Skip to content

Commit

Permalink
Merge branch 'master' into demographic-entity
Browse files Browse the repository at this point in the history
  • Loading branch information
znatty22 committed Feb 1, 2018
2 parents eeecf1f + 8838cce commit 10e3831
Show file tree
Hide file tree
Showing 4 changed files with 268 additions and 0 deletions.
Empty file.
35 changes: 35 additions & 0 deletions dataservice/api/genomic_file/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from dataservice.extensions import db
from dataservice.api.common.model import Base

# TODO May want to change all uuid strings postgres UUID later on


class GenomicFile(db.Model, Base):
"""
GenomicFile entity.
:param _id: Unique id assigned by RDBMS
:param kf_id: Unique id given by the Kid's First DCC
:param created_at: Time of object creation
:param modified_at: Last time of object modification
:param uuid: UUID assigned to file from Gen3
:param file_name: Name of file
:param file_type: Type of genomic file (i.e. aligned reads)
:param file_format: Format of file
:param file_url: Location of file
:param md5sum: 128 bit md5 hash of file
"""

__tablename__ = 'genomic_file'
file_name = db.Column(db.Text())
file_type = db.Column(db.Text())
file_format = db.Column(db.Text())
file_url = db.Column(db.Text())
# TODO Change to use UUID for md5sum later on
# See link for why md5sum should use uuid type
# https://dba.stackexchange.com/questions/115271/what-is-the-optimal-data-type-for-an-md5-field
md5sum = db.Column(db.String(32), unique=True)
sequencing_experiment_id = db.Column(db.String(8),
db.ForeignKey(
'sequencing_experiment.kf_id'),
nullable=False)
6 changes: 6 additions & 0 deletions dataservice/api/sequencing_experiment/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataservice.extensions import db
from dataservice.api.common.model import Base
from dataservice.api.genomic_file.models import GenomicFile


class SequencingExperiment(db.Model, Base):
Expand Down Expand Up @@ -43,3 +44,8 @@ class SequencingExperiment(db.Model, Base):
mean_read_length = db.Column(db.Float())
aliquot_id = db.Column(db.String(8),
db.ForeignKey('aliquot.kf_id'), nullable=False)
genomic_files = db.relationship(GenomicFile,
cascade="all, delete-orphan",
backref=db.backref(
'sequencing_experiments',
lazy=True))
227 changes: 227 additions & 0 deletions tests/genomic_file/test_geonmic_file_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import uuid

from sqlalchemy.exc import IntegrityError

from dataservice.extensions import db
from dataservice.api.participant.models import Participant
from dataservice.api.sample.models import Sample
from dataservice.api.aliquot.models import Aliquot
from dataservice.api.sequencing_experiment.models import SequencingExperiment
from dataservice.api.genomic_file.models import GenomicFile
from tests.utils import FlaskTestCase


class ModelTest(FlaskTestCase):
"""
Test GenomicFile database model
"""

def test_create_and_find(self):
"""
Test create genomic file
"""
# Create genomic file dependent entities
self._create_save_dependents()
self.assertEqual(Participant.query.count(), 1)
self.assertEqual(Sample.query.count(), 2)
self.assertEqual(Aliquot.query.count(), 4)
self.assertEqual(SequencingExperiment.query.count(), 4)

# Create genomic genomic files for just one experiment
experiment = SequencingExperiment.query.all()[0]
kf_id = experiment.kf_id
kwargs_dict = {}
for i in range(2):
kwargs = {
'file_name': 'file_{}'.format(i),
'file_type': 'submitted aligned read',
'file_format': '.cram',
'file_url': 's3://file_{}'.format(i),
'md5sum': str(uuid.uuid4()),
'sequencing_experiment_id': experiment.kf_id
}
kwargs_dict[kwargs['md5sum']] = kwargs
# Add genomic file to db session
db.session.add(GenomicFile(**kwargs))
db.session.commit()

# Check database
experiment = SequencingExperiment.query.filter_by(
kf_id=kf_id).one()
self.assertEqual(len(experiment.genomic_files), 2)

# Check all input field values with persisted field values
# for each genomic file
for _md5sum, kwargs in kwargs_dict.items():
gf = GenomicFile.query.filter_by(md5sum=_md5sum).one()
for k, v in kwargs.items():
self.assertEqual(getattr(gf, k), v)

def test_create_via_experiment(self):
"""
Test create genomic file
"""
# Create and save genomic files and dependent entities
experiment_id, kwargs_dict = self._create_save_genomic_files()

# Check database
experiment = SequencingExperiment.query.filter_by(
kf_id=experiment_id).one()

# Check number created files
self.assertEqual(len(experiment.genomic_files), 2)

# Check all input field values with persisted field values
# for each genomic file
for _md5sum, kwargs in kwargs_dict.items():
gf = GenomicFile.query.filter_by(md5sum=_md5sum).one()
for k, v in kwargs.items():
self.assertEqual(getattr(gf, k), v)

def test_update(self):
"""
Test update genomic file
"""
# Create and save genomic files and dependent entities
experiment_id, kwargs_dict = self._create_save_genomic_files()

# Update fields
kwargs = kwargs_dict[list(kwargs_dict.keys())[0]]
kwargs['file_name'] = 'updated file name'
kwargs['file_type'] = 'updated file type'
gf = GenomicFile.query.filter_by(md5sum=kwargs['md5sum']).one()
[setattr(gf, k, v)
for k, v in kwargs.items()]
db.session.commit()

# Check database
gf = GenomicFile.query.filter_by(md5sum=kwargs['md5sum']).one()
[self.assertEqual(getattr(gf, k), v)
for k, v in kwargs.items()]

def test_delete(self):
"""
Test delete existing genomic file
"""
# Create and save genomic files and dependent entities
experiment_id, kwargs_dict = self._create_save_genomic_files()

# Get genomic files for experiment
experiment = SequencingExperiment.query.filter_by(
kf_id=experiment_id).one()

# Delete genomic files
for gf in experiment.genomic_files:
db.session.delete(gf)
db.session.commit()

# Check database
experiment = SequencingExperiment.query.filter_by(
kf_id=experiment_id).one()
self.assertEqual(len(experiment.genomic_files), 0)

def test_delete_via_experiment(self):
"""
Test delete existing genomic file
Delete sequencing experiment to which genomic file belongs
"""
# Create and save genomic files and dependent entities
experiment_id, kwargs_dict = self._create_save_genomic_files()

# Get genomic files for experiment
experiment = SequencingExperiment.query.filter_by(
kf_id=experiment_id).one()

# Delete experiment
db.session.delete(experiment)
db.session.commit()

# Check database
for gf_md5sum in kwargs_dict.keys():
gf = GenomicFile.query.filter_by(md5sum=gf_md5sum).one_or_none()
self.assertIs(gf, None)

def test_not_null_constraint(self):
"""
Test that a genomic file cannot be created without required parameters
such as sequencing_experiment_id
"""
# Create genomic file without foreign key_
gf = GenomicFile()
self.assertRaises(IntegrityError, db.session.add(gf))

def test_foreign_key_constraint(self):
"""
Test that a genomic file cannot be created without an existing
sequencing experiment
"""
# Create genomic file without foreign key_
gf = GenomicFile(**{'sequencing_experiment_id': ''})
self.assertRaises(IntegrityError, db.session.add(gf))

def _create_save_genomic_files(self):
"""
Create and save genomic files to database
"""
# Create and save genomic file dependent entities
self._create_save_dependents()

# Create genomic genomic files
experiment = SequencingExperiment.query.all()[0]
kwargs_dict = {}
for i in range(2):
kwargs = {
'file_name': 'file_{}'.format(i),
'file_type': 'submitted aligned read',
'file_format': '.cram',
'file_url': 's3://file_{}'.format(i),
'md5sum': str(uuid.uuid4())
}
kwargs_dict[kwargs['md5sum']] = kwargs
# Add genomic file to list in experiment
experiment.genomic_files.append(GenomicFile(**kwargs))
db.session.commit()

return experiment.kf_id, kwargs_dict

def _create_save_dependents(self):
"""
Create and save all genomic file dependent entities to db
Dependent entities: participant, samples, aliquots,
sequencing_experiments
"""
p = Participant(external_id='p1', samples=self._create_samples())
db.session.add(p)
db.session.commit()

def _create_samples(self, total=2):
"""
Create samples with aliquots
"""
return [Sample(external_id='s{}'.format(i),
aliquots=self._create_aliquots())
for i in range(total)]

def _create_aliquots(self, total=2):
"""
Create aliquots with sequencing experiments
"""
return [Aliquot(external_id='a{}'.format(i),
analyte_type='dna',
sequencing_experiments=self._create_experiments())
for i in range(total)]

def _create_experiments(self, total=1):
"""
Create sequencing experiments
"""
data = {
'external_id': 'se1',
'experiment_strategy': 'wgs',
'center': 'broad',
'is_paired_end': True,
'platform': 'platform'
}
return [SequencingExperiment(**data) for i in range(total)]

0 comments on commit 10e3831

Please sign in to comment.