From c7548c6f401b6a62bf1fd0c5828ff29327925bb1 Mon Sep 17 00:00:00 2001 From: Iskandar Sitdikov Date: Thu, 22 Jun 2023 17:31:00 -0400 Subject: [PATCH] Bug: cluster ids (#714) --- gateway/api/schedule.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gateway/api/schedule.py b/gateway/api/schedule.py index a00947601..e1a1c87dc 100644 --- a/gateway/api/schedule.py +++ b/gateway/api/schedule.py @@ -1,5 +1,6 @@ """Scheduling related functions.""" import random +import uuid from typing import List from django.conf import settings @@ -47,13 +48,14 @@ def execute_job(job: Job) -> Job: """ authors_resource = ComputeResource.objects.filter(owner=job.author).first() + cluster_name = f"cluster-{job.author.username}-{str(uuid.uuid4())[:8]}" if authors_resource: job.compute_resource = authors_resource job = submit_ray_job(job) job.status = Job.PENDING job.save() else: - compute_resource = create_ray_cluster(job.author) + compute_resource = create_ray_cluster(job.author, cluster_name=cluster_name) if compute_resource: # if compute resource was created in time with no problems job.compute_resource = compute_resource @@ -64,7 +66,7 @@ def execute_job(job: Job) -> Job: else: # if something went wrong # try to kill resource if it was allocated - kill_ray_cluster(job.author.username) + kill_ray_cluster(cluster_name) job.status = Job.FAILED job.logs = "Something went wrong during compute resource allocation." job.save()