Skip to content

Commit

Permalink
Merge pull request #468 from tira-io/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
mam10eks authored Jul 8, 2023
2 parents bee943c + 1afc64f commit a2daa56
Show file tree
Hide file tree
Showing 46 changed files with 2,335 additions and 334 deletions.
4 changes: 2 additions & 2 deletions application/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

VENV_NAME?=venv
PYTHON=${PWD}/${VENV_NAME}/bin/python3
VERSION_APPLICATION=0.0.64
VERSION_GRPC=0.0.64
VERSION_APPLICATION=0.0.67
VERSION_GRPC=0.0.67

.DEFAULT: help
help:
Expand Down
5 changes: 5 additions & 0 deletions application/src/tira/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ def func_wrapper(request, *args, **kwargs):
if role == auth.ROLE_ADMIN or role == auth.ROLE_TIRA:
return func(request, *args, **kwargs)

# SERP endpoint is allowed for runs that are published and unblinded
if (request.path_info.startswith('serp/') or request.path_info.startswith('/serp/')) and run_id \
and run_id in request.path_info and model.run_is_public_and_unblinded(run_id):
return func(request, *args, **kwargs)

if 'run_id_1' in kwargs or 'run_id_2' in kwargs:
return HttpResponseNotAllowed(f"Access forbidden.")

Expand Down
215 changes: 148 additions & 67 deletions application/src/tira/data/HybridDatabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,11 @@ def get_docker_software_by_name(self, name, vm_id, task_id) -> dict:
except modeldb.Dataset.DoesNotExist:
return {}

def run_is_public_and_unblinded(self, run_id):
run_id = modeldb.Run.objects.get(input_run__run_id=run_id).run_id
review = modeldb.Review.objects.get(run_id=run_id)
return review.published and not review.blinded

def get_reranking_docker_softwares(self):
return [self._docker_software_to_dict(i) for i in modeldb.DockerSoftware.objects.filter(ir_re_ranking_input=True)]

Expand Down Expand Up @@ -546,6 +551,32 @@ def _docker_software_to_dict(self, ds):
"ir_re_ranking_input": True if ds.ir_re_ranking_input else False
}

def get_count_of_missing_reviews(self, task_id):
prepared_statement = """
SELECT
tira_run.input_dataset_id,
SUM(CASE WHEN tira_review.has_errors = False AND tira_review.has_no_errors = FALSE AND tira_review.has_warnings = FALSE THEN 1 ELSE 0 END) as ToReview,
COUNT(*) as submissions
FROM
tira_run
INNER JOIN
tira_taskhasdataset ON tira_run.input_dataset_id = tira_taskhasdataset.dataset_id
LEFT JOIN
tira_review ON tira_run.run_id = tira_review.run_id
WHERE
tira_taskhasdataset.task_id = %s
GROUP BY
tira_run.input_dataset_id;
"""

ret = []
rows = self.execute_raw_sql_statement(prepared_statement, params=[task_id])
for dataset_id, to_review, submissions in rows:
ret += [{'dataset_id': dataset_id, 'to_review': to_review, 'submissions': submissions}]

return ret


def get_docker_softwares_with_runs(self, task_id, vm_id):
def _runs_by_docker_software(ds):
reviews = modeldb.Review.objects.select_related("run", "run__upload", "run__evaluator", "run__input_run",
Expand Down Expand Up @@ -613,6 +644,26 @@ def get_irds_docker_software_id(self, task_id, vm_id, software_id, docker_softwa

return ret[0] if len(ret) > 0 else None

def get_evaluations_of_run(self, vm_id, run_id):
prepared_statement = '''
SELECT
evaluation_run.run_id
FROM
tira_run as evaluation_run
INNER JOIN
tira_run as input_run ON evaluation_run.input_run_id = input_run.run_id
LEFT JOIN
tira_upload ON input_run.upload_id = tira_upload.id
LEFT JOIN
tira_software ON input_run.software_id = tira_software.id
LEFT JOIN
tira_dockersoftware ON input_run.docker_software_id = tira_dockersoftware.docker_software_id
WHERE
evaluation_run.input_run_id = %s and evaluation_run.evaluator_id IS NOT NULL
AND (tira_upload.vm_id = %s OR tira_software.vm_id = %s OR tira_dockersoftware.vm_id = %s)
'''
return [i[0] for i in self.execute_raw_sql_statement(prepared_statement, [run_id, vm_id, vm_id, vm_id])]

def get_vms_with_reviews(self, dataset_id: str):
""" returns a list of dicts with:
{"vm_id": vm_id,
Expand Down Expand Up @@ -726,78 +777,108 @@ def round_if_float(fl):
except ValueError:
return fl

def format_evaluation(r, ks, run_id_to_software_name):
"""
@param r: a queryset of modeldb.Run
@param ks: a list of keys of evaluation parameters
"""
def if_exists(evals):
for k in ks:
ev = evals.filter(run__run_id=run.run_id, measure_key=k).all()
if ev.exists():
yield round_if_float(ev[0].measure_value)
else:
yield "-"

# print(r.all().values())
for run in r:
if run.run_id in exclude:
continue
values = evaluations.filter(run__run_id=run.run_id).all()
if not values.exists():
continue
try:
input_run = run.input_run
software_name = ''
if input_run.software:
vm_id = run.input_run.software.vm.vm_id
elif input_run.docker_software:
vm_id = run.input_run.docker_software.vm.vm_id
software_name = run.input_run.docker_software.display_name
elif input_run.upload:
vm_id = run.input_run.upload.vm.vm_id
software_name = 'Run Upload'
else:
logger.error(
f"The input run {run.run_id} has no vm assigned. Assigning None instead")
vm_id = "None"

except AttributeError as e:
logger.error(f"The vm or software of run {run.run_id} does not exist. Maybe either was deleted?", e)
vm_id = "None"

rev = modeldb.Review.objects.get(run__run_id=run.run_id)

yield {"vm_id": vm_id,
"run_id": run.run_id if '-evaluated-run-' not in run.run_id else run.run_id.split('-evaluated-run-')[1],
'input_run_id': run.input_run.run_id,
"input_software_name": software_name,
'published': rev.published,
'blinded': rev.blinded,
"measures": list(if_exists(evaluations))}

run_id_to_software_name = {}

for run in modeldb.Run.objects.filter(input_dataset=dataset_id).exclude(input_run__isnull=False).all():
docker_software = run.docker_software
if not docker_software:
continue

docker_software_name = docker_software.display_name
if not docker_software_name:

prepared_statement = '''
SELECT
evaluation_run.input_dataset_id, evaluation_run.run_id, input_run.run_id, tira_upload.display_name, tira_upload.vm_id, tira_software.vm_id,
tira_dockersoftware.display_name, tira_dockersoftware.vm_id, tira_evaluation_review.published,
tira_evaluation_review.blinded, tira_run_review.published, tira_run_review.blinded,
tira_evaluation.measure_key, tira_evaluation.measure_value
FROM
tira_run as evaluation_run
INNER JOIN
tira_run as input_run ON evaluation_run.input_run_id = input_run.run_id
LEFT JOIN
tira_upload ON input_run.upload_id = tira_upload.id
LEFT JOIN
tira_software ON input_run.software_id = tira_software.id
LEFT JOIN
tira_dockersoftware ON input_run.docker_software_id = tira_dockersoftware.docker_software_id
LEFT JOIN
tira_review as tira_evaluation_review ON evaluation_run.run_id = tira_evaluation_review.run_id
LEFT JOIN
tira_review as tira_run_review ON input_run.run_id = tira_run_review.run_id
LEFT JOIN
tira_evaluation ON tira_evaluation.run_id = evaluation_run.run_id
WHERE
evaluation_run.input_run_id is not null AND evaluation_run.deleted = FALSE AND input_run.deleted = False
AND <DATASET_ID_STATEMENT>
ORDER BY
tira_evaluation.id ASC;
'''

dataset_id_statement = []
dataset_ids = [dataset_id]
additional_datasets = modeldb.Dataset.objects.get(dataset_id=dataset_id).meta_dataset_of

if additional_datasets:
dataset_ids += [j.strip() for j in additional_datasets.split(',') if j.strip()]

for _ in dataset_ids:
dataset_id_statement += ['evaluation_run.input_dataset_id = %s']
dataset_id_statement = ' OR '.join(dataset_id_statement)
prepared_statement = prepared_statement.replace('<DATASET_ID_STATEMENT>', f'({dataset_id_statement})')

keys = dict()
input_run_to_evaluation = {}
rows = self.execute_raw_sql_statement(prepared_statement, params=dataset_ids)

for dataset_id, run_id, input_run_id, upload_display_name, upload_vm_id, software_vm_id, docker_display_name, \
docker_vm_id, eval_published, eval_blinded, run_published, run_blinded, measure_key, measure_value in rows:

if not measure_key or (not include_unpublished and not eval_published):
continue

run_id_to_software_name[run.run_id] = docker_software_name
if run_id not in input_run_to_evaluation:
input_run_to_evaluation[run_id] = {'measures': {}}

vm_id = 'None'
software_name = ''
pretty_run_id = run_id if '-evaluated-run-' not in run_id else run_id.split('-evaluated-run-')[1]
is_upload, is_software = False, False
if upload_display_name and upload_vm_id:
vm_id = upload_vm_id
is_upload = True
software_name = upload_display_name
elif docker_display_name and docker_vm_id:
vm_id = docker_vm_id
is_software = True
software_name = docker_display_name
elif software_vm_id:
vm_id = software_vm_id
is_software = True

input_run_to_evaluation[run_id]['dataset_id'] = dataset_id
input_run_to_evaluation[run_id]['vm_id'] = vm_id
input_run_to_evaluation[run_id]['input_software_name'] = software_name
input_run_to_evaluation[run_id]['run_id'] = pretty_run_id
input_run_to_evaluation[run_id]['input_run_id'] = input_run_id
input_run_to_evaluation[run_id]['published'] = eval_published
input_run_to_evaluation[run_id]['blinded'] = eval_blinded or run_blinded
input_run_to_evaluation[run_id]['is_upload'] = is_upload
input_run_to_evaluation[run_id]['is_software'] = is_software
input_run_to_evaluation[run_id]['measures'][measure_key] = measure_value
keys[measure_key] = ''

keys = list(keys.keys())
ret = []

for i in input_run_to_evaluation.values():
i['measures'] = [round_if_float(i['measures'].get(k, '-')) for k in keys]
ret += [i]

runs = modeldb.Run.objects.filter(input_dataset=dataset_id).exclude(input_run__isnull=True).all()
evaluations = modeldb.Evaluation.objects.select_related('run', 'run__software__vm').filter(
run__input_dataset__dataset_id=dataset_id).all()
keys = [k['measure_key'] for k in evaluations.values('measure_key').distinct()]
return keys, ret

exclude = {review.run.run_id for review in modeldb.Review.objects.select_related('run').filter(
run__input_dataset__dataset_id=dataset_id, published=False, run__software=None).all()
if not include_unpublished}
return keys, list(format_evaluation(runs, keys, run_id_to_software_name))
def execute_raw_sql_statement(self, prepared_statement, params):
from django.db import connection
ret = []
with connection.cursor() as cursor:
cursor.execute(prepared_statement, params=params)
for i in cursor.fetchall():
ret += [i]

return ret

def get_evaluation(self, run_id):
try:
Expand Down
6 changes: 3 additions & 3 deletions application/src/tira/endpoints/admin_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,9 +453,9 @@ def admin_create_group(request, vm_id):
def admin_edit_review(request, dataset_id, vm_id, run_id):
if request.method == "POST":
data = json.loads(request.body)
no_errors = data["no_errors"]
output_error = data["output_error"]
software_error = data.get("software_error", False) # Sometimes the frontend does not send the software error field
no_errors = data.get("no_errors", True)
output_error = data.get("output_error", False)
software_error = data.get("software_error", False)
comment = data["comment"]

# sanity checks
Expand Down
30 changes: 27 additions & 3 deletions application/src/tira/endpoints/data_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import csv
from io import StringIO
from copy import deepcopy
from tira.util import link_to_discourse_team

include_navigation = True if settings.DEPLOYMENT == "legacy" else False

Expand Down Expand Up @@ -60,12 +61,20 @@ def get_evaluations_by_dataset(request, context, task_id, dataset_id):
"""
role = context["role"]

ev_keys, evaluations = model.get_evaluations_with_keys_by_dataset(dataset_id, True if role == "admin" else None)
user_vms_for_task = []
_add_user_vms_to_context(request, context, task_id, include_docker_details=False)

if 'user_vms_for_task' in user_vms_for_task:
user_vms_for_task = context['user_vms_for_task']
task = model.get_task(task_id, False)
is_ir_task = 'is_ir_task' in task and task['is_ir_task']
is_admin = role == "admin"
ev_keys, evaluations = model.get_evaluations_with_keys_by_dataset(dataset_id, is_admin)

context["task_id"] = task_id
context["dataset_id"] = dataset_id
context["ev_keys"] = ev_keys
context["evaluations"] = evaluations
context["evaluations"] = sorted(evaluations, key=lambda r: r['run_id'])
headers = [{'title': 'Team', 'key': 'vm_id'}, {'title': 'Approach', 'key': 'input_software_name'},
{'title': 'Run', 'key': 'run_id'}]
evaluation_headers = [{'title': k, 'key': k} for k in ev_keys]
Expand All @@ -78,7 +87,8 @@ def get_evaluations_by_dataset(request, context, task_id, dataset_id):
runs = []
for i in evaluations:
i = deepcopy(i)
i['link_to_team'] = 'https://www.tira.io/g/tira_vm_' + i['vm_id'] if not i['vm_id'].endswith('-default') else 'https://www.tira.io/u/' + i['vm_id'].split('-default')[0]
i['link_to_team'] = link_to_discourse_team(i['vm_id'])
eval_run_id = i["run_id"]
for k, v in [('input_run_id', 'run_id')]:
i[v] = i[k]
del i[k]
Expand All @@ -90,6 +100,12 @@ def get_evaluations_by_dataset(request, context, task_id, dataset_id):
del i[j]
runs += [i]

if not i['blinded'] and (is_admin or i['vm_id'] in user_vms_for_task or i['published']):
i['link_results_download'] = f'/task/{task_id}/user/{i["vm_id"]}/dataset/{i["dataset_id"]}/download/{eval_run_id}.zip'
i['link_run_download'] = f'/task/{task_id}/user/{i["vm_id"]}/dataset/{i["dataset_id"]}/download/{i["run_id"]}.zip'
if is_ir_task:
i['link_serp'] = f'/serp/{task_id}/user/{i["vm_id"]}/dataset/{i["dataset_id"]}/{i["run_id"]}'

context["runs"] = runs

return JsonResponse({'status': 0, "context": context})
Expand Down Expand Up @@ -127,6 +143,14 @@ def get_submissions_by_dataset(request, context, task_id, dataset_id):
return JsonResponse({'status': 1, "context": context})


@check_permissions
@check_resources_exist("json")
@add_context
def get_evaluations_of_run(request, context, vm_id, run_id):
context['evaluations'] = model.get_evaluations_of_run(vm_id, run_id)
return JsonResponse({'status': 0, "context": context})


@check_permissions
@check_resources_exist("json")
@add_context
Expand Down
6 changes: 6 additions & 0 deletions application/src/tira/endpoints/organizer_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,9 @@ def blind(request, vm_id, dataset_id, run_id, value):
context = {"status": "1", "blinded": (not value), "message": f"Blinded is now: {value}"}
return JsonResponse(context)


@check_permissions
@check_resources_exist('json')
def get_count_of_missing_reviews(request, task_id):
context = {"count_of_missing_reviews": model.get_count_of_missing_reviews(task_id)}
return JsonResponse({"status": 0, "context": context})
Loading

0 comments on commit a2daa56

Please sign in to comment.