Skip to content

Commit

Permalink
Remove O(N) queries from workspace status API
Browse files Browse the repository at this point in the history
Previously this was doing O(N) queries, which we accepted when the API
was addede because we did not, at the time, have workspaces with more
than low-10s of jobs.  We now have workspaces with hundreds of jobs
defined.  On a particularly large workspace (long-covid-symptoms) this
endpoint could take as long as 5s (and ~300 queries) to calculate.

This change uses raw SQL so we can make use of a GROUP BY in the CTE to
get the latest actions and then JOIN to it in the outer query to match
those up with their statuses.
  • Loading branch information
ghickman committed Dec 11, 2023
1 parent c4fb0d8 commit a3db731
Showing 1 changed file with 24 additions and 15 deletions.
39 changes: 24 additions & 15 deletions jobserver/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from django.contrib.auth.models import UserManager as DjangoUserManager
from django.contrib.auth.validators import UnicodeUsernameValidator
from django.contrib.postgres.fields import ArrayField
from django.db import models, transaction
from django.db import connection, models, transaction
from django.db.models import Min, Q, prefetch_related_objects
from django.db.models.functions import Lower, NullIf
from django.urls import reverse
Expand Down Expand Up @@ -1363,21 +1363,30 @@ def get_action_status_lut(self, backend=None):
We need to get the latest status for each action run inside this
Workspace.
"""
jobs = Job.objects.filter(job_request__workspace=self)

if backend:
jobs = jobs.filter(job_request__backend__slug=backend)

# get all known actions
actions = set(jobs.values_list("action", flat=True))
action_status_lut = {}
for action in actions:
# get the latest status for an action
job = jobs.filter(action=action).order_by("-created_at").first()
action_status_lut[action] = job.status
return action_status_lut
We're doing this in raw SQL so we can use a CTE to get the latest
actions, and then join that with the Jobs table to get the status of
those actions.
"""
sql = """
WITH latest_actions AS (
SELECT
job.action AS name,
job.status AS status,
row_number() OVER (PARTITION BY job.action ORDER BY job.created_at DESC) AS row_num
FROM
jobserver_job job
INNER JOIN jobserver_jobrequest job_request ON (job.job_request_id = job_request.id)
INNER JOIN jobserver_workspace workspace ON (job_request.workspace_id = workspace.id)
WHERE workspace.name = %s
)
SELECT name, status
FROM latest_actions
WHERE row_num = 1
"""
with connection.cursor() as cursor:
cursor.execute(sql, [self.name])
return dict(cursor.fetchall())

@property
def is_interactive(self):
Expand Down

0 comments on commit a3db731

Please sign in to comment.