Skip to content

Commit

Permalink
Retrieve and show annotations (#180)
Browse files Browse the repository at this point in the history
* Updated backend to include annotation retrieval for dependencies and ui

* Updated unit tests and integration to pass
  • Loading branch information
SeriousHorncat authored Sep 16, 2024
1 parent cc99a41 commit 2f88d17
Show file tree
Hide file tree
Showing 19 changed files with 2,689 additions and 865 deletions.
3 changes: 1 addition & 2 deletions backend/src/core/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def queue_annotation_tasks(self, analysis: Analysis, annotation_task_queue: Anno
def process_tasks(
annotation_queue: AnnotationQueue, analysis_name: str, genomic_unit_collection: GenomicUnitCollection,
analysis_collection: AnalysisCollection
): # pylint: disable=too-many-branches
): # pylint: disable=too-many-branches,too-many-locals
"""Processes items that have been added to the queue"""
logger.info("%s Processing annotation tasks queue ...", annotation_log_label())

Expand All @@ -110,7 +110,6 @@ def process_tasks(
if annotation_unit.has_dependencies():
missing_dependencies = annotation_unit.get_missing_dependencies()
for missing_dataset_name in missing_dependencies:
# missing_dataset_name
dependency_dataset = analysis_collection.get_manifest_dataset_config(
analysis_name, missing_dataset_name
)
Expand Down
5 changes: 0 additions & 5 deletions backend/src/core/dataset_manifest.py

This file was deleted.

4 changes: 3 additions & 1 deletion backend/src/repository/analysis_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def add_dataset_to_manifest(self, analysis_name: str, annotation_unit: Annotatio
return updated_document['manifest']

def get_manifest_dataset_config(self, analysis_name: str, dataset_name: str):
""" Returns an individual dataset manifest """
dataset_attribute = f"manifest.{dataset_name}"
result = self.collection.find_one({"name": analysis_name, dataset_attribute: {'$exists': True}})

Expand All @@ -159,9 +160,10 @@ def get_manifest_dataset_config(self, analysis_name: str, dataset_name: str):
}

def get_dataset_manifest(self, analysis_name):
"""Returns the analysis' dataset manifest for annotation versions and sources"""
analysis = self.find_by_name(analysis_name)
if analysis is None:
return
return None

return analysis['manifest']

Expand Down
40 changes: 14 additions & 26 deletions backend/src/repository/genomic_unit_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# pylint: disable=too-few-public-methods
# Disabling too few public metods due to utilizing Pydantic/FastAPI BaseSettings class
from bson import ObjectId
from pymongo import ReturnDocument

from src.core.annotation_unit import AnnotationUnit

Expand Down Expand Up @@ -50,7 +51,7 @@ def __find_annotation_query__(self, annotation_unit: AnnotationUnit):
'gene': 'VMA21',
'annotations.CADD': {'$exists': True },
'annotations.CADD.data_source': 'Ensembl',
'annotations.CADD.version': 'HARD_CODED_VERSION'
'annotations.CADD.version': '112'
}
"""
find_query = self.__find_genomic_unit_query__(annotation_unit)
Expand Down Expand Up @@ -101,22 +102,12 @@ def find_genomic_unit_annotation_value(self, annotation_unit: AnnotationUnit):
dataset_name = annotation_unit.get_dataset_name()
find_query = self.__find_annotation_query__(annotation_unit)

if dataset_name == "HPO_NCBI_GENE_ID" and annotation_unit.get_genomic_unit() == "DLG4":
logger.info("\n\n\n")
logger.info(f"{find_query}")
logger.fino("\n\n\n")

projection = {f"annotations.{dataset_name}.value.$": 1, "_id": 0}
result = self.collection.find_one(find_query, projection)

if result is None:
return None

if dataset_name == "HPO_NCBI_GENE_ID" and annotation_unit.get_genomic_unit() == "DLG4":
logger.info("\n\n\n")
logger.info(f"{result}")
logger.fino("\n\n\n")

return next((
annotation[dataset_name][0].get('value')
for annotation in result['annotations']
Expand Down Expand Up @@ -146,10 +137,10 @@ def add_transcript_to_genomic_unit(self, genomic_unit, transcript_id):
def update_genomic_unit_annotation_by_mongo_id(self, genomic_unit_document):
""" Takes a genomic unit and overwrites the existing object based on the object's id """
genomic_unit_id = genomic_unit_document['_id']
self.collection.update_one(
{'_id': ObjectId(str(genomic_unit_id))},
{'$set': genomic_unit_document},
)

return self.collection.find_one_and_update({'_id': ObjectId(str(genomic_unit_id))},
{'$set': genomic_unit_document},
return_document=ReturnDocument.AFTER)

def annotate_genomic_unit(self, genomic_unit, genomic_annotation):
"""
Expand All @@ -165,6 +156,8 @@ def annotate_genomic_unit(self, genomic_unit, genomic_annotation):
}]
}

updated_document = None

if 'transcript_id' in genomic_annotation:
genomic_unit_document = self.find_genomic_unit_with_transcript_id(
genomic_unit, genomic_annotation['transcript_id']
Expand All @@ -180,14 +173,14 @@ def annotate_genomic_unit(self, genomic_unit, genomic_annotation):
if transcript['transcript_id'] == genomic_annotation['transcript_id']:
transcript['annotations'].append(annotation_data_set)

self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
updated_document = self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)

else:
genomic_unit_document = self.find_genomic_unit(genomic_unit)
genomic_unit_document['annotations'].append(annotation_data_set)
self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
updated_document = self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)

return
return updated_document

def annotate_genomic_unit_with_file(self, genomic_unit, genomic_annotation):
""" Ensures that an annotation is created for the annotation image upload and only one image is allowed """
Expand All @@ -198,8 +191,7 @@ def annotate_genomic_unit_with_file(self, genomic_unit, genomic_annotation):
for annotation in genomic_unit_document['annotations']:
if data_set in annotation:
annotation[data_set][0]['value'].append(genomic_annotation['value'])
self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
return
return self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)

annotation_data_set = {
genomic_annotation['data_set']: [{
Expand All @@ -210,9 +202,7 @@ def annotate_genomic_unit_with_file(self, genomic_unit, genomic_annotation):
}

genomic_unit_document['annotations'].append(annotation_data_set)
self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)

return
return self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)

def update_genomic_unit_file_annotation(self, genomic_unit, data_set, annotation_value, file_id_old):
""" Replaces existing annotation image with new image """
Expand Down Expand Up @@ -243,9 +233,7 @@ def remove_genomic_unit_file_annotation(self, genomic_unit, data_set, file_id):
annotation[data_set][0]['value'].pop(i)
break

self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)

return
return self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)

def create_genomic_unit(self, genomic_unit):
"""
Expand Down
122 changes: 122 additions & 0 deletions backend/src/routers/analysis_annotation_router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
""" Analysis endpoint routes that provide an interface to interact with an Analysis' discussions """
import logging

from fastapi import (APIRouter, Depends, HTTPException)

from ..dependencies import database
from ..enums import GenomicUnitType

logger = logging.getLogger(__name__)

router = APIRouter(tags=["analysis annotations"], dependencies=[Depends(database)])


@router.get("/{analysis_name}/gene/{gene}")
def get_annotations_by_gene(analysis_name, gene, repositories=Depends(database)):
"""Returns annotations data by calling method to find annotations by gene"""

genomic_unit = {
'type': GenomicUnitType.GENE,
'unit': gene,
}

dataset_manifest = repositories["analysis"].get_dataset_manifest(analysis_name)
genomic_unit_json = repositories["genomic_unit"].find_genomic_unit(genomic_unit)

if genomic_unit_json is None:
raise HTTPException(status_code=404, detail=f"Gene'{gene}' annotations not found.")

manifest = AnalysisDatasetManfiest(dataset_manifest)
annotations = manifest.retrieve_annotations(genomic_unit_json['annotations'])

return annotations


@router.get("/{analysis_name}/hgvsVariant/{variant}")
def get_annotations_by_hgvs_variant(analysis_name: str, variant: str, repositories=Depends(database)):
"""Returns annotations data by calling method to find annotations for variant and relevant transcripts
by HGVS Variant"""

genomic_unit = {
'type': GenomicUnitType.HGVS_VARIANT,
'unit': variant,
}

dataset_manifest = repositories["analysis"].get_dataset_manifest(analysis_name)
genomic_unit_json = repositories["genomic_unit"].find_genomic_unit(genomic_unit)

if genomic_unit_json is None:
raise HTTPException(status_code=404, detail=f"Variant'{variant}' annotations not found.")

manifest = AnalysisDatasetManfiest(dataset_manifest)
annotations = manifest.retrieve_annotations(genomic_unit_json['annotations'])

transcript_annotation_list = []
for transcript_annotation in genomic_unit_json['transcripts']:
transcript_annotations = manifest.retrieve_annotations(transcript_annotation['annotations'])
transcript_annotation_list.append(transcript_annotations)

return {**annotations, "transcripts": transcript_annotation_list}


class AnalysisDatasetManfiest():
"""
Retrieves dataset annotations based on an analysis' manifest.
An analysis' manifest comprises of entries for each dataset in the following
example:
{
'CADD': {
data_source: 'Ensembl',
version: '120'
}
}
"""

def __init__(self, analysis_dataset_manifest):
"""
Initializes with a list of analysis' dataset manifest entries.
"""
self.manifest = analysis_dataset_manifest

def retrieve_annotations(self, unit_annotations):
"""
Extracts annotations from the provided list of unit annotations and returns a dictionary
of datasets and their corresponding values.
unit_annotations is a list of annotations for a genomic unit, where each annotation is structured as the
following example
{
'CADD': [{
'data_source': 'Ensembl',
'version': '112',
}]
}
"""
annotations = {}
for annotation_json in unit_annotations:
for dataset in annotation_json:
if len(annotation_json[dataset]) > 0:
analysis_dataset = self.get_value_for_dataset(dataset, annotation_json[dataset])
annotations[dataset] = analysis_dataset[
'value'] if analysis_dataset is not None else annotation_json[dataset][0]['value']
return annotations

def get_value_for_dataset(self, dataset_name: str, annotation_json_list: list):
"""
Retrieves the annotation according to the analysis' dataset manifest entry matching the dataset's name,
'data_source', and 'version'. None is returned when there isn't an entry in the manifest.
"""
dataset_config = next((configuration for configuration in self.manifest if dataset_name in configuration), None)

if dataset_config is None:
return dataset_config

configuration = dataset_config[dataset_name]

return next((
annotation for annotation in annotation_json_list
if annotation['data_source'] == configuration['data_source'] and
annotation['version'] == configuration['version']
), None)
2 changes: 2 additions & 0 deletions backend/src/routers/analysis_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@
from ..models.user import VerifyUser
from ..security.security import get_authorization, get_current_user

from . import analysis_annotation_router
from . import analysis_attachment_router
from . import analysis_discussion_router
from . import analysis_section_router

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/analysis", dependencies=[Depends(database)])
router.include_router(analysis_annotation_router.router)
router.include_router(analysis_attachment_router.router)
router.include_router(analysis_discussion_router.router)
router.include_router(analysis_section_router.router)
Expand Down
Loading

0 comments on commit 2f88d17

Please sign in to comment.