Skip to content

Commit 19a9dda

Browse files
committed
fix #1837
1 parent 9eb9dbb commit 19a9dda

File tree

3 files changed

+112
-253
lines changed

3 files changed

+112
-253
lines changed

qiita_pet/handlers/api_proxy/studies.py

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from qiita_db.util import (supported_filepath_types,
1717
get_files_from_uploads_folders)
1818
from qiita_pet.handlers.api_proxy.util import check_access
19+
from qiita_core.exceptions import IncompetentQiitaDeveloperError
1920

2021

2122
def data_types_get_req():
@@ -198,7 +199,7 @@ def study_prep_get_req(study_id, user_id):
198199
def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
199200
"""Returns the uploaded files for the study id categorized by artifact_type
200201
201-
It retrieves the files uploaded for the given study and tries to do a
202+
It retrieves the files uploaded for the given study and tries to
202203
guess on how those files should be added to the artifact of the given
203204
type. Uses information on the prep template to try to do a better guess.
204205
@@ -234,31 +235,48 @@ def study_files_get_req(user_id, study_id, prep_template_id, artifact_type):
234235
remaining = []
235236

236237
uploaded = get_files_from_uploads_folders(study_id)
237-
pt = PrepTemplate(prep_template_id).to_dataframe()
238+
pt = PrepTemplate(prep_template_id)
238239

240+
if pt.study_id != study_id:
241+
raise IncompetentQiitaDeveloperError(
242+
"The requested prep id (%d) doesn't belong to the study "
243+
"(%d)" % (pt.study_id, study_id))
244+
245+
pt = pt.to_dataframe()
239246
ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types
240247
if ft != 'raw_sff')
241248
if any(ftypes_if) and 'run_prefix' in pt.columns:
242249
prep_prefixes = tuple(set(pt['run_prefix']))
243250
num_prefixes = len(prep_prefixes)
244-
for _, filename in uploaded:
245-
if filename.startswith(prep_prefixes):
246-
selected.append(filename)
247-
else:
248-
remaining.append(filename)
251+
# special case for per_sample_FASTQ
252+
if artifact_type == 'per_sample_FASTQ':
253+
# sorting prefixes by length to avoid collisions like: 100 1002
254+
# 10003
255+
prep_prefixes = sorted(prep_prefixes, key=len, reverse=True)
256+
# group files by prefix
257+
sfiles = {p: [f for _, f in uploaded if f.startswith(p)]
258+
for p in prep_prefixes}
259+
for k, v in viewitems(sfiles):
260+
len_files = len(v)
261+
if len_files != 1 and len_files != 2:
262+
remaining.extend(v)
263+
else:
264+
v.sort()
265+
selected.append(v)
266+
else:
267+
len_files = 1
268+
for _, filename in uploaded:
269+
if filename.startswith(prep_prefixes):
270+
selected.append(filename)
271+
else:
272+
remaining.append(filename)
249273
else:
250274
num_prefixes = 0
251275
remaining = [f for _, f in uploaded]
252276

253-
# At this point we can't do anything smart about selecting by default
254-
# the files for each type. The only thing that we can do is assume that
255-
# the first in the supp_file_types list is the default one where files
256-
# should be added in case of 'run_prefix' being present
257-
file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]]
258-
first = supp_file_types[0]
259-
# Note that this works even if `run_prefix` is not in the prep template
260-
# because selected is initialized to the empty list
261-
file_types.insert(0, (first[0], first[1], selected))
277+
# get file_types, format: filetype, required, list of files
278+
file_types = [(t, req, [x[i] for x in selected if i+1 <= len_files])
279+
for i, (t, req) in enumerate(supp_file_types)]
262280

263281
# Create a list of artifacts that the user has access to, in case that
264282
# he wants to import the files from another artifact

qiita_pet/handlers/api_proxy/tests/test_studies.py

Lines changed: 78 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,16 @@
77
# -----------------------------------------------------------------------------
88
from unittest import TestCase, main
99
from datetime import datetime
10-
from os.path import exists, join, basename, isdir
11-
from os import remove, close, mkdir
10+
from os.path import exists, join, isdir
11+
from os import remove
1212
from shutil import rmtree
13-
from tempfile import mkstemp, mkdtemp
13+
from tempfile import mkdtemp
1414

1515
import pandas as pd
1616
import numpy.testing as npt
1717

1818
from qiita_core.util import qiita_test_checker
19+
from qiita_core.exceptions import IncompetentQiitaDeveloperError
1920
import qiita_db as qdb
2021
from qiita_pet.handlers.api_proxy.studies import (
2122
data_types_get_req, study_get_req, study_prep_get_req, study_delete_req,
@@ -282,7 +283,9 @@ def test_study_prep_get_req_failed_EBI(self):
282283
}
283284
metadata = pd.DataFrame.from_dict(metadata_dict, orient='index',
284285
dtype=str)
285-
qdb.metadata_template.sample_template.SampleTemplate.create(
286+
npt.assert_warns(
287+
qdb.exceptions.QiitaDBWarning,
288+
qdb.metadata_template.sample_template.SampleTemplate.create,
286289
metadata, study)
287290

288291
# (C)
@@ -409,6 +412,7 @@ def test_study_files_get_req(self):
409412
'Cannabis Soils (1) - Raw data 1 (1)')]}
410413
self.assertEqual(obs, exp)
411414

415+
# adding a new study for further testing
412416
info = {
413417
"timeseries_type_id": 1,
414418
"metadata_complete": True,
@@ -422,58 +426,86 @@ def test_study_files_get_req(self):
422426
"principal_investigator_id": qdb.study.StudyPerson(3),
423427
"lab_person_id": qdb.study.StudyPerson(1)
424428
}
425-
426429
new_study = qdb.study.Study.create(
427430
qdb.user.User('test@foo.bar'), "Some New Study to get files", [1],
428431
info)
429432

430-
obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ')
431-
exp = {'status': 'success',
432-
'message': '',
433-
'remaining': [],
434-
'file_types': [('raw_barcodes', True, []),
435-
('raw_forward_seqs', True, []),
436-
('raw_reverse_seqs', False, [])],
437-
'num_prefixes': 1,
438-
'artifacts': [(1, 'Identification of the Microbiomes for '
439-
'Cannabis Soils (1) - Raw data 1 (1)')]}
433+
# check that you can't call a this function using two unrelated
434+
# study_id and prep_template_id
435+
with self.assertRaises(IncompetentQiitaDeveloperError):
436+
study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ')
437+
438+
def test_study_files_get_req_per_sample_FASTQ(self):
439+
study_id = 1
440+
# adding a new prep for testing
441+
PREP = qdb.metadata_template.prep_template.PrepTemplate
442+
prep_info_dict = {
443+
'SKB7.640196': {'run_prefix': 'test_1'},
444+
'SKB8.640193': {'run_prefix': 'test_2'}
445+
}
446+
prep_info = pd.DataFrame.from_dict(prep_info_dict,
447+
orient='index', dtype=str)
448+
pt = npt.assert_warns(
449+
qdb.exceptions.QiitaDBWarning, PREP.create, prep_info,
450+
qdb.study.Study(study_id), "Metagenomic")
451+
452+
# getting the upload folder so we can test
453+
study_upload_dir = join(
454+
qdb.util.get_mountpoint("uploads")[0][1], str(study_id))
455+
456+
# adding just foward per sample FASTQ to the upload folder
457+
filenames = ['test_1.R1.fastq.gz', 'test_2.R1.fastq.gz']
458+
for f in filenames:
459+
fpt = join(study_upload_dir, f)
460+
open(fpt, 'w', 0).close()
461+
self._clean_up_files.append(fpt)
462+
obs = study_files_get_req(
463+
'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
464+
exp = {
465+
'status': 'success', 'num_prefixes': 2, 'artifacts': [],
466+
'remaining': [], 'message': '',
467+
'file_types': [
468+
('raw_forward_seqs', True,
469+
['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']),
470+
('raw_reverse_seqs', False, [])]}
440471
self.assertEqual(obs, exp)
441472

442-
obs = study_files_get_req('admin@foo.bar', new_study.id, 1, 'FASTQ')
443-
exp = {'status': 'success',
444-
'message': '',
445-
'remaining': [],
446-
'file_types': [('raw_barcodes', True, []),
447-
('raw_forward_seqs', True, []),
448-
('raw_reverse_seqs', False, [])],
449-
'num_prefixes': 1,
450-
'artifacts': []}
473+
# let's add reverse
474+
filenames = ['test_1.R2.fastq.gz', 'test_2.R2.fastq.gz']
475+
for f in filenames:
476+
fpt = join(study_upload_dir, f)
477+
open(fpt, 'w', 0).close()
478+
self._clean_up_files.append(fpt)
479+
obs = study_files_get_req(
480+
'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
481+
exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [],
482+
'remaining': [], 'message': '',
483+
'file_types': [('raw_forward_seqs', True,
484+
['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']),
485+
('raw_reverse_seqs', False,
486+
['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz'])]}
451487
self.assertEqual(obs, exp)
452488

453-
# Create some 'sff' files
454-
upload_dir = qdb.util.get_mountpoint("uploads")[0][1]
455-
study_upload_dir = join(upload_dir, str(new_study.id))
456-
fps = []
457-
458-
for i in range(2):
459-
if not exists(study_upload_dir):
460-
mkdir(study_upload_dir)
461-
fd, fp = mkstemp(suffix=".sff", dir=study_upload_dir)
462-
close(fd)
463-
with open(fp, 'w') as f:
464-
f.write('\n')
465-
fps.append(fp)
466-
467-
self._clean_up_files.extend(fps)
468-
469-
obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'SFF')
470-
exp = {'status': 'success',
489+
# let's an extra file that matches
490+
filenames = ['test_1.R3.fastq.gz']
491+
for f in filenames:
492+
fpt = join(study_upload_dir, f)
493+
open(fpt, 'w', 0).close()
494+
self._clean_up_files.append(fpt)
495+
obs = study_files_get_req(
496+
'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ')
497+
exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [],
498+
'remaining': ['test_1.R1.fastq.gz', 'test_1.R2.fastq.gz',
499+
'test_1.R3.fastq.gz'],
471500
'message': '',
472-
'remaining': [basename(fpath) for fpath in sorted(fps)],
473-
'file_types': [('raw_sff', True, [])],
474-
'num_prefixes': 0,
475-
'artifacts': []}
501+
'file_types': [('raw_forward_seqs', True,
502+
['test_2.R1.fastq.gz']),
503+
('raw_reverse_seqs', False,
504+
['test_2.R2.fastq.gz'])]}
476505
self.assertEqual(obs, exp)
477506

507+
PREP.delete(pt.id)
508+
509+
478510
if __name__ == '__main__':
479511
main()

0 commit comments

Comments
 (0)