Skip to content

Commit

Permalink
fixed creatining multiple genomic units when uploading twice, investi…
Browse files Browse the repository at this point in the history
…gating why transcripts are showing as not existing when they do
  • Loading branch information
SeriousHorncat committed Sep 18, 2024
1 parent ec04208 commit 325e59f
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 9 deletions.
6 changes: 6 additions & 0 deletions backend/src/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ class GenomicUnitType(str, Enum):
HGVS_VARIANT = "hgvs_variant"
INVALID = "invalid"

@classmethod
def string_types(cls):
return (GenomicUnitType.GENE.value,GenomicUnitType.TRANSCRIPT, GenomicUnitType.VARIANT, GenomicUnitType.HGVS_VARIANT.value)




class AnnotationSourceType(str, Enum):
"""Enumeration of the different types of annotation sources in the configuration"""
Expand Down
25 changes: 17 additions & 8 deletions backend/src/repository/genomic_unit_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from bson import ObjectId
from pymongo import ReturnDocument

from src.enums import GenomicUnitType
from src.core.annotation_unit import AnnotationUnit

# create logger
logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -84,6 +84,10 @@ def annotation_exist(self, annotation_unit: AnnotationUnit):
'%s (%s): dataset - %s', annotation_unit.to_name_string(), annotation_unit.get_version(),
hgvs_genomic_unit
)

if 'transcripts' not in hgvs_genomic_unit or len(hgvs_genomic_unit['transcripts']) == 0:
return False

for transcript in hgvs_genomic_unit['transcripts']:
dataset_in_transcript_annotation = next((
annotation for annotation in transcript['annotations']
Expand All @@ -106,12 +110,8 @@ def find_genomic_unit_annotation_value(self, annotation_unit: AnnotationUnit):
find_query = self.__find_annotation_query__(annotation_unit)
projection = {f"annotations.{dataset_name}.value.$": 1, "_id": 0}

# logger.info('find query: %s', find_query)
# logger.info('projection: %s', projection)
result = self.collection.find_one(find_query, projection)

# logger.info('retrieved the genomic unit value for "%s"', result);

if result is None:
return None

Expand Down Expand Up @@ -246,10 +246,19 @@ def create_genomic_unit(self, genomic_unit):
"""
Takes a genomic_unit and adds it to the collection if it doesn't already exist (exact match).
"""
type_to_save = GenomicUnitType.string_types() & genomic_unit.keys()

if (len(type_to_save) != 1):
logger.error(
'Failed to create new Genomic Unit "%s", contains more then one genomic_unit type',genomic_unit
)
genomic_unit_type = type_to_save.pop()
find_query = {
genomic_unit_type: genomic_unit[genomic_unit_type]
}

# Make sure the genomic unit doesn't already exist
if self.collection.find_one(genomic_unit):
logging.info("Genomic unit already exists, skipping creation")
if self.collection.find_one(find_query):
logger.info("Genomic unit already exists, skipping creation")
return

self.collection.insert_one(genomic_unit)
Expand Down
14 changes: 13 additions & 1 deletion backend/tests/fixtures/annotations-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,5 +80,17 @@
"versioning_type": "rest",
"version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
}
},
{
"data_set": "transcript_id",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { transcript_id: .transcript_id }",
"versioning_type": "rest",
"version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
}
]

0 comments on commit 325e59f

Please sign in to comment.