Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion qiita_pet/handlers/rest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

from .study import StudyHandler, StudyCreatorHandler, StudyStatusHandler
from .study_samples import (StudySamplesHandler, StudySamplesInfoHandler,
StudySamplesCategoriesHandler)
StudySamplesCategoriesHandler,
StudySamplesDetailHandler,
StudySampleDetailHandler)
from .study_person import StudyPersonHandler
from .study_preparation import (StudyPrepCreatorHandler,
StudyPrepArtifactCreatorHandler)
Expand All @@ -26,6 +28,9 @@
(r"/api/v1/study/([0-9]+)/samples/categories=([a-zA-Z\-0-9\.:,_]*)",
StudySamplesCategoriesHandler),
(r"/api/v1/study/([0-9]+)/samples", StudySamplesHandler),
(r"/api/v1/study/([0-9]+)/samples/status", StudySamplesDetailHandler),
(r"/api/v1/study/([0-9]+)/sample/([a-zA-Z\-0-9\.]+)/status",
StudySampleDetailHandler),
(r"/api/v1/study/([0-9]+)/samples/info", StudySamplesInfoHandler),
(r"/api/v1/person(.*)", StudyPersonHandler),
(r"/api/v1/study/([0-9]+)/preparation/([0-9]+)/artifact",
Expand Down
94 changes: 94 additions & 0 deletions qiita_pet/handlers/rest/study_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,107 @@
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------
from collections import defaultdict

from tornado.escape import json_encode, json_decode
import pandas as pd

from qiita_db.handlers.oauth2 import authenticate_oauth
from .rest_handler import RESTHandler


def _sample_details(study, samples):
def detail_maker(**kwargs):
base = {'sample_id': None,
'sample_found': False,
'ebi_sample_accession': None,
'preparation_id': None,
'ebi_experiment_accession': None,
'preparation_visibility': None,
'preparation_type': None}

assert set(kwargs).issubset(set(base)), "Unexpected key to set"

base.update(kwargs)
return base

# cache sample detail for lookup
study_samples = set(study.sample_template.keys())
sample_accessions = study.sample_template.ebi_sample_accessions
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that ebi_sample_accessions will return all available samples with Nones where there is no accession; in other words, len(sample_accessions) == len(study_samples)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont think that impacts the subsequent use


# cache preparation information that we'll need

# map of {sample_id: [indices, of, light, prep, info, ...]}
sample_prep_mapping = defaultdict(list)
pt_light = []
for idx, pt in enumerate(study.prep_templates()):
pt_light.append((pt.id, pt.ebi_experiment_accessions,
pt.status, pt.data_type()))

for ptsample in pt.keys():
sample_prep_mapping[ptsample].append(idx)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My concern with this block is that it will always load all samples, even when len(samples) == 1.

Another way to do this could be to first select which preps have the samples you are looking for and then build the details, something like this:

samples_set = set(samples) # not sure if this is required as its own var. 
prep_templates = [pt for pt in study.prep_templates() if set(pt) & samples_set]
...
for idx, pt in enumerate(prep_templates):
    ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't the cost of this the same as it's still necessary to iterate over all preps?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes time wise but my concern is the memory (should have said this in my previous message) to store all prep info data in pt_light, in specific due to pt.ebi_experiment_accessions, the other values are pretty small; you can imagine that this can grow a lot for studies like the AGP. However, this is something internal and if you think this is not that large or important we can improve in a future iteration, if it actually becomes a problem.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay sounds good, last commit should reduce what's cached

details = []
for sample in samples:
if sample in study_samples:
# if the sample exists
sample_acc = sample_accessions.get(sample)

if sample in sample_prep_mapping:
# if the sample is present in any prep, pull out the detail
# specific to those preparations
for pt_idx in sample_prep_mapping[sample]:
ptid, ptacc, ptstatus, ptdtype = pt_light[pt_idx]

details.append(detail_maker(
sample_id=sample,
sample_found=True,
ebi_sample_accession=sample_acc,
preparation_id=ptid,
ebi_experiment_accession=ptacc.get(sample),
preparation_visibility=ptstatus,
preparation_type=ptdtype))
else:
# the sample is not present on any preparations
details.append(detail_maker(
sample_id=sample,
sample_found=True,

# it would be weird to have an EBI sample accession
# but not be present on a preparation...?
ebi_sample_accession=sample_acc))
else:
# the is not present, let's note and move ona
details.append(detail_maker(sample_id=sample))

return details


class StudySampleDetailHandler(RESTHandler):
@authenticate_oauth
def get(self, study_id, sample_id):
study = self.safe_get_study(study_id)
sample_detail = _sample_details(study, [sample_id, ])
self.write(json_encode(sample_detail))
self.finish()


class StudySamplesDetailHandler(RESTHandler):
@authenticate_oauth
def post(self, study_id):
samples = json_decode(self.request.body)

if 'sample_ids' not in samples:
self.fail('Missing sample_id key', 400)
return

study = self.safe_get_study(study_id)
samples_detail = _sample_details(study, samples['sample_ids'])

self.write(json_encode(samples_detail))
self.finish()


class StudySamplesHandler(RESTHandler):

@authenticate_oauth
Expand Down
141 changes: 141 additions & 0 deletions qiita_pet/test/rest/test_sample_detail.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# -----------------------------------------------------------------------------
# Copyright (c) 2014--, The Qiita Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------

from unittest import main, TestCase

from tornado.escape import json_decode

import qiita_db

from qiita_pet.test.rest.test_base import RESTHandlerTestCase
from qiita_pet.handlers.rest.study_samples import _sample_details


class SupportTests(TestCase):
def test_samples_detail(self):
exp = [{'sample_id': '1.SKD7.640191',
'sample_found': True,
'ebi_sample_accession': 'ERS000021',
'preparation_id': 1,
'ebi_experiment_accession': 'ERX0000021',
'preparation_visibility': 'private',
'preparation_type': '18S'},
{'sample_id': '1.SKD7.640191',
'sample_found': True,
'ebi_sample_accession': 'ERS000021',
'preparation_id': 2,
'ebi_experiment_accession': 'ERX0000021',
'preparation_visibility': 'private',
'preparation_type': '18S'},
{'sample_id': 'doesnotexist',
'sample_found': False,
'ebi_sample_accession': None,
'preparation_id': None,
'ebi_experiment_accession': None,
'preparation_visibility': None,
'preparation_type': None}]
obs = _sample_details(qiita_db.study.Study(1),
['1.SKD7.640191', 'doesnotexist'])
self.assertEqual(len(obs), len(exp))
self.assertEqual(obs, exp)


class SampleDetailHandlerTests(RESTHandlerTestCase):
def test_get_missing_sample(self):
exp = [{'sample_id': 'doesnotexist',
'sample_found': False,
'ebi_sample_accession': None,
'preparation_id': None,
'ebi_experiment_accession': None,
'preparation_visibility': None,
'preparation_type': None}, ]

response = self.get('/api/v1/study/1/sample/doesnotexist/status',
headers=self.headers)
self.assertEqual(response.code, 200)
obs = json_decode(response.body)
self.assertEqual(obs, exp)

def test_get_valid_sample(self):
exp = [{'sample_id': '1.SKD7.640191',
'sample_found': True,
'ebi_sample_accession': 'ERS000021',
'preparation_id': 1,
'ebi_experiment_accession': 'ERX0000021',
'preparation_visibility': 'private',
'preparation_type': '18S'},
{'sample_id': '1.SKD7.640191',
'sample_found': True,
'ebi_sample_accession': 'ERS000021',
'preparation_id': 2,
'ebi_experiment_accession': 'ERX0000021',
'preparation_visibility': 'private',
'preparation_type': '18S'}]

response = self.get('/api/v1/study/1/sample/1.SKD7.640191/status',
headers=self.headers)
self.assertEqual(response.code, 200)
obs = json_decode(response.body)
self.assertEqual(obs, exp)

def test_post_samples_status_bad_request(self):
body = {'malformed': 'with garbage'}
response = self.post('/api/v1/study/1/samples/status',
headers=self.headers,
data=body, asjson=True)
self.assertEqual(response.code, 400)

def test_post_samples_status(self):
exp = [{'sample_id': '1.SKD7.640191',
'sample_found': True,
'ebi_sample_accession': 'ERS000021',
'preparation_id': 1,
'ebi_experiment_accession': 'ERX0000021',
'preparation_visibility': 'private',
'preparation_type': '18S'},
{'sample_id': '1.SKD7.640191',
'sample_found': True,
'ebi_sample_accession': 'ERS000021',
'preparation_id': 2,
'ebi_experiment_accession': 'ERX0000021',
'preparation_visibility': 'private',
'preparation_type': '18S'},
{'sample_id': 'doesnotexist',
'sample_found': False,
'ebi_sample_accession': None,
'preparation_id': None,
'ebi_experiment_accession': None,
'preparation_visibility': None,
'preparation_type': None},
{'sample_id': '1.SKM5.640177',
'sample_found': True,
'ebi_sample_accession': 'ERS000005',
'preparation_id': 1,
'ebi_experiment_accession': 'ERX0000005',
'preparation_visibility': 'private',
'preparation_type': '18S'},
{'sample_id': '1.SKM5.640177',
'sample_found': True,
'ebi_sample_accession': 'ERS000005',
'preparation_id': 2,
'ebi_experiment_accession': 'ERX0000005',
'preparation_visibility': 'private',
'preparation_type': '18S'}]

body = {'sample_ids': ['1.SKD7.640191', 'doesnotexist',
'1.SKM5.640177']}
response = self.post('/api/v1/study/1/samples/status',
headers=self.headers,
data=body, asjson=True)
self.assertEqual(response.code, 200)
obs = json_decode(response.body)
self.assertEqual(obs, exp)


if __name__ == '__main__':
main()