Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
SeriousHorncat committed Sep 6, 2024
1 parent 602d91b commit f7395cb
Show file tree
Hide file tree
Showing 10 changed files with 195 additions and 17 deletions.
11 changes: 9 additions & 2 deletions backend/src/core/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import logging
import queue

from backend.src.repository.analysis_collection import AnalysisCollection
from backend.src.repository.genomic_unit_collection import GenomicUnitCollection

from .annotation_task import AnnotationTaskFactory, VersionAnnotationTask
from ..models.analysis import Analysis
from ..repository.annotation_config_collection import AnnotationConfigCollection
Expand Down Expand Up @@ -83,7 +86,7 @@ def queue_annotation_tasks(self, analysis: Analysis, annotation_task_queue: Anno
annotation_task_queue.put(annotation_unit_queued)

@staticmethod
def process_tasks(annotation_queue, genomic_unit_collection): # pylint: disable=too-many-branches
def process_tasks(annotation_queue: AnnotationQueue, analysis_name: str, genomic_unit_collection: GenomicUnitCollection, analysis_collection: AnalysisCollection): # pylint: disable=too-many-branches
"""Processes items that have been added to the queue"""
logger.info("%s Processing annotation tasks queue ...", annotation_log_label())

Expand All @@ -104,8 +107,11 @@ def process_tasks(annotation_queue, genomic_unit_collection): # pylint: disable
if annotation_unit.has_dependencies():
missing_dependencies = annotation_unit.get_missing_dependencies()
for missing_dataset_name in missing_dependencies:
# missing_dataset_name
dependency_dataset = analysis_collection.get_manifest_dataset(analysis_name, missing_dataset_name)
dependency_annotation_unit = AnnotationUnit(annotation_unit.genomic_unit, dependency_dataset)
annotation_value = genomic_unit_collection.find_genomic_unit_annotation_value(
annotation_unit.genomic_unit, missing_dataset_name
dependency_annotation_unit
)
if annotation_value:
annotation_unit.set_annotation_for_dependency(missing_dataset_name, annotation_value)
Expand Down Expand Up @@ -154,6 +160,7 @@ def process_tasks(annotation_queue, genomic_unit_collection): # pylint: disable
genomic_unit_collection.annotate_genomic_unit(
task.annotation_unit.genomic_unit, annotation
)
analysis_collection.add_dataset_to_manifest(analysis_name, annotation_unit)

except FileNotFoundError as error:
logger.info(
Expand Down
6 changes: 3 additions & 3 deletions backend/src/core/annotation_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,17 +240,17 @@ def annotate(self):

def get_annotation_version_from_rest(self):
"""Gets version for rest type and returns the version data"""
version_from_rest = "REST-VERSION-PLACEHOLDER"
version_from_rest = "rosalution-temp-manifest-00"
return version_from_rest

def get_annotation_version_from_rosalution(self):
"""Gets version for rosalution type and returns the version data"""
version_from_rosalution = "ROSALUTION-VERSION-PLACEHOLDER"
version_from_rosalution = "rosalution-temp-manifest-00"
return version_from_rosalution

def get_annotation_version_from_date(self):
"""Gets version for date type and returns the version data"""
version_from_date = {'date': "DATE-VERSION-PLACEHOLDER"}
version_from_date = "rosalution-temp-manifest-00"
# getting version from date
return version_from_date

Expand Down
2 changes: 1 addition & 1 deletion backend/src/core/annotation_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class AnnotationUnit:
def __init__(self, genomic_unit, dataset):
self.genomic_unit = copy.deepcopy(genomic_unit)
self.dataset = dataset
self.version = ""
self.version = "" if not dataset['version'] else dataset['version']

def get_genomic_unit(self):
"""Returs 'unit' from genomic_unit"""
Expand Down
2 changes: 2 additions & 0 deletions backend/src/models/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class Analysis(BaseAnalysis):
sections: List[Section] = []
discussions: List = []
supporting_evidence_files: List = []
manifest: List = [
]

def units_to_annotate(self):
"""Returns the types of genomic units within the analysis"""
Expand Down
26 changes: 26 additions & 0 deletions backend/src/repository/analysis_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from pymongo import ReturnDocument

from backend.src.core.annotation_unit import AnnotationUnit

from ..models.analysis import Section
from ..models.event import Event
from ..enums import EventType
Expand Down Expand Up @@ -130,6 +132,30 @@ def get_genomic_units(self, analysis_name: str):

return genomic_units_return

def add_dataset_to_manifest(self, analysis_name: str, annotation_unit: AnnotationUnit):
"""Adds this dataset and its version to this Analysis."""

dataset = {
annotation_unit.get_dataset_name(): {
annotation_unit.get_dataset_source(),
annotation_unit.get_version()
}
}

updated_document = self.collection.find_one_and_update({"name": analysis_name},
{"$push": {"manifest": dataset}},
return_document=ReturnDocument.AFTER)

return updated_document['manifest']

def get_manifest_dataset(self, analysis_name: str, dataset_name: str):
dataset_attribute = f"manifest.{dataset_name}"
return self.collection.find_one({
"name": analysis_name,
dataset_attribute : {'$exists': True }
})


def create_analysis(self, analysis_data: dict):
"""Creates a new analysis if the name does not already exist"""
if self.collection.find_one({"name": analysis_data["name"]}) is not None:
Expand Down
21 changes: 15 additions & 6 deletions backend/src/repository/genomic_unit_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@ def __find_genomic_unit_query__(self, annotation_unit: AnnotationUnit):
"""
# find_query = {
# 'gene': 'VMA21',
# 'annotations.CADD': {'$exists': True },
# 'annotations.CADD.data_source': 'Ensembl',
# 'annotations.CADD.version': 'HARD_CODED_VERSION'
#}}
#}
"""
genomic_unit_type_string = annotation_unit.get_genomic_unit_type_string()
genomic_unit_name = annotation_unit.get_genomic_unit()
Expand All @@ -54,7 +51,7 @@ def __find_annotation_query__(self, annotation_unit: AnnotationUnit):
'annotations.CADD': {'$exists': True },
'annotations.CADD.data_source': 'Ensembl',
'annotations.CADD.version': 'HARD_CODED_VERSION'
}}
}
"""
find_query = self.__find_genomic_unit_query__(annotation_unit)
data_set_name = annotation_unit.get_dataset_name()
Expand All @@ -78,7 +75,7 @@ def annotation_exist(self, annotation_unit: AnnotationUnit):
data_set_name = annotation_unit.get_dataset_name()
dataset_version = annotation_unit.get_version()
dataset_source = annotation_unit.get_dataset_source()

find_query = self.__find_genomic_unit_query__(annotation_unit)

if annotation_unit.is_transcript_dataset():
Expand All @@ -103,11 +100,22 @@ def find_genomic_unit_annotation_value(self, annotation_unit: AnnotationUnit):

dataset_name = annotation_unit.get_dataset_name()
find_query = self.__find_annotation_query__(annotation_unit)

if dataset_name == "HPO_NCBI_GENE_ID" and annotation_unit.get_genomic_unit() == "DLG4":
logger.info("\n\n\n")
logger.info(f"{find_query}")
logger.fino("\n\n\n")

projection = {f"annotations.{dataset_name}.value.$": 1, "_id": 0}
result = self.collection.find_one(find_query, projection)

if result is None:
return None

if dataset_name == "HPO_NCBI_GENE_ID" and annotation_unit.get_genomic_unit() == "DLG4":
logger.info("\n\n\n")
logger.info(f"{result}")
logger.fino("\n\n\n")

return next((
annotation[dataset_name][0].get('value')
Expand Down Expand Up @@ -247,6 +255,7 @@ def create_genomic_unit(self, genomic_unit):
# Make sure the genomic unit doesn't already exist
if self.collection.find_one(genomic_unit):
logging.info("Genomic unit already exists, skipping creation")
return

self.collection.insert_one(genomic_unit)
return
32 changes: 31 additions & 1 deletion backend/tests/fixtures/analysis-CPAM0002.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,42 @@
"name": "CPAM0002",
"description": "Vacuolar myopathy with autophagy, X-linked vacuolar myopathy with autophagy",
"nominated_by": "Dr. Person One",
"dataset_configuration": [
"manifest": [
{
"Polyphen Prediction": {
"data_source": "Ensembl",
"version": "112"
}
}, {
"Entrez Gene Id": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
},{
"HPO_NCBI_GENE_ID": {
"data_source": "Ensembl",
"version": "2024-09-06"
}
},{
"Ensembl Gene Id": {
"data_source": "Ensembl",
"version": "112"
}
},{
"ClinGen_gene_url": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
},{
"OMIM": {
"data_source": "Ensembl",
"version": "2024-09-06"
}
},{
"ClinVar_Variantion_Id": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
}
],
"genomic_units": [
Expand Down
38 changes: 34 additions & 4 deletions backend/tests/fixtures/analysis-CPAM0046.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,43 @@
"name":"CPAM0046",
"description":": LMNA-related congenital muscular dystropy",
"nominated_by":"Dr. Person Two",
"dataset_configuration": [
"manifest": [
{
"Polyphen Prediction": {
"data_source": "Ensembl",
"version": "120"}
}
],
"version": "112"
}
}, {
"Entrez Gene Id": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
},{
"HPO_NCBI_GENE_ID": {
"data_source": "Ensembl",
"version": "2024-09-06"
}
},{
"Ensembl Gene Id": {
"data_source": "Ensembl",
"version": "112"
}
},{
"ClinGen_gene_url": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
},{
"OMIM": {
"data_source": "Ensembl",
"version": "2024-09-06"
}
},{
"ClinVar_Variantion_Id": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
}],
"genomic_units":[
{
"gene":"LMNA",
Expand Down
37 changes: 37 additions & 0 deletions backend/tests/fixtures/analysis-CPAM0047.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,43 @@
"name":"CPAM0047",
"description":"Congenital variant of Rett syndrome",
"nominated_by":"CMT4B3 Foundation",
"manifest": [
{
"Polyphen Prediction": {
"data_source": "Ensembl",
"version": "112"
}
}, {
"Entrez Gene Id": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
},{
"HPO_NCBI_GENE_ID": {
"data_source": "Ensembl",
"version": "2024-09-06"
}
},{
"Ensembl Gene Id": {
"data_source": "Ensembl",
"version": "112"
}
},{
"ClinGen_gene_url": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
},{
"OMIM": {
"data_source": "Ensembl",
"version": "2024-09-06"
}
},{
"ClinVar_Variantion_Id": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
}],
"genomic_units":[
{
"gene":"SBF1",
Expand Down
37 changes: 37 additions & 0 deletions backend/tests/fixtures/analysis-CPAM0112.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,43 @@
"name": "CPAM0112",
"description": "",
"nominated_by": "",
"manifest": [
{
"Polyphen Prediction": {
"data_source": "Ensembl",
"version": "112"
}
}, {
"Entrez Gene Id": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
},{
"HPO_NCBI_GENE_ID": {
"data_source": "Ensembl",
"version": "2024-09-06"
}
},{
"Ensembl Gene Id": {
"data_source": "Ensembl",
"version": "112"
}
},{
"ClinGen_gene_url": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
},{
"OMIM": {
"data_source": "Ensembl",
"version": "2024-09-06"
}
},{
"ClinVar_Variantion_Id": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
}],
"genomic_units": [
{
"gene": "VMA21",
Expand Down

0 comments on commit f7395cb

Please sign in to comment.