diff --git a/qiita_pet/handlers/api_proxy/studies.py b/qiita_pet/handlers/api_proxy/studies.py index 0cae2abb1..98b4bdebf 100644 --- a/qiita_pet/handlers/api_proxy/studies.py +++ b/qiita_pet/handlers/api_proxy/studies.py @@ -16,6 +16,7 @@ from qiita_db.util import (supported_filepath_types, get_files_from_uploads_folders) from qiita_pet.handlers.api_proxy.util import check_access +from qiita_core.exceptions import IncompetentQiitaDeveloperError def data_types_get_req(): @@ -198,7 +199,7 @@ def study_prep_get_req(study_id, user_id): def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): """Returns the uploaded files for the study id categorized by artifact_type - It retrieves the files uploaded for the given study and tries to do a + It retrieves the files uploaded for the given study and tries to guess on how those files should be added to the artifact of the given type. Uses information on the prep template to try to do a better guess. @@ -234,31 +235,46 @@ def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): remaining = [] uploaded = get_files_from_uploads_folders(study_id) - pt = PrepTemplate(prep_template_id).to_dataframe() + pt = PrepTemplate(prep_template_id) + if pt.study_id != study_id: + raise IncompetentQiitaDeveloperError( + "The requested prep id (%d) doesn't belong to the study " + "(%d)" % (pt.study_id, study_id)) + + pt = pt.to_dataframe() ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types if ft != 'raw_sff') if any(ftypes_if) and 'run_prefix' in pt.columns: prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) - for _, filename in uploaded: - if filename.startswith(prep_prefixes): - selected.append(filename) + # sorting prefixes by length to avoid collisions like: 100 1002 + # 10003 + prep_prefixes = sorted(prep_prefixes, key=len, reverse=True) + # group files by prefix + sfiles = {p: [f for _, f in uploaded if f.startswith(p)] + for p in prep_prefixes} + inuse = [y for x in sfiles.values() for y in x] + remaining.extend([f for _, f in uploaded if f not in inuse]) + supp_file_types_len = len(supp_file_types) + + for k, v in viewitems(sfiles): + len_files = len(v) + # if the number of files in the k group is larger than the + # available columns add to the remaining group, if not put them in + # the selected group + if len_files > supp_file_types_len: + remaining.extend(v) else: - remaining.append(filename) + v.sort() + selected.append(v) else: num_prefixes = 0 remaining = [f for _, f in uploaded] - # At this point we can't do anything smart about selecting by default - # the files for each type. The only thing that we can do is assume that - # the first in the supp_file_types list is the default one where files - # should be added in case of 'run_prefix' being present - file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]] - first = supp_file_types[0] - # Note that this works even if `run_prefix` is not in the prep template - # because selected is initialized to the empty list - file_types.insert(0, (first[0], first[1], selected)) + # get file_types, format: filetype, required, list of files + file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)]) + for i, (t, req) in enumerate(supp_file_types)] # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact diff --git a/qiita_pet/handlers/api_proxy/tests/test_studies.py b/qiita_pet/handlers/api_proxy/tests/test_studies.py index 0a0e427a0..e700d37f3 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_studies.py +++ b/qiita_pet/handlers/api_proxy/tests/test_studies.py @@ -7,15 +7,16 @@ # ----------------------------------------------------------------------------- from unittest import TestCase, main from datetime import datetime -from os.path import exists, join, basename, isdir -from os import remove, close, mkdir +from os.path import exists, join, isdir +from os import remove from shutil import rmtree -from tempfile import mkstemp, mkdtemp +from tempfile import mkdtemp import pandas as pd import numpy.testing as npt from qiita_core.util import qiita_test_checker +from qiita_core.exceptions import IncompetentQiitaDeveloperError import qiita_db as qdb from qiita_pet.handlers.api_proxy.studies import ( data_types_get_req, study_get_req, study_prep_get_req, study_delete_req, @@ -237,6 +238,8 @@ def test_study_prep_get_req(self): for i in range(4, 0, -1): qdb.artifact.Artifact(i).visibility = "private" + qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) + def test_study_prep_get_req_failed_EBI(self): temp_dir = mkdtemp() self._clean_up_files.append(temp_dir) @@ -282,7 +285,9 @@ def test_study_prep_get_req_failed_EBI(self): } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) - qdb.metadata_template.sample_template.SampleTemplate.create( + npt.assert_warns( + qdb.exceptions.QiitaDBWarning, + qdb.metadata_template.sample_template.SampleTemplate.create, metadata, study) # (C) @@ -334,6 +339,8 @@ def test_study_prep_get_req_failed_EBI(self): 'status': 'success'} self.assertEqual(obs, exp) + qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) + def test_study_prep_get_req_no_access(self): obs = study_prep_get_req(1, 'demo@microbio.me') exp = {'status': 'error', @@ -409,6 +416,7 @@ def test_study_files_get_req(self): 'Cannabis Soils (1) - Raw data 1 (1)')]} self.assertEqual(obs, exp) + # adding a new study for further testing info = { "timeseries_type_id": 1, "metadata_complete": True, @@ -422,58 +430,103 @@ def test_study_files_get_req(self): "principal_investigator_id": qdb.study.StudyPerson(3), "lab_person_id": qdb.study.StudyPerson(1) } - new_study = qdb.study.Study.create( qdb.user.User('test@foo.bar'), "Some New Study to get files", [1], info) - obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ') - exp = {'status': 'success', - 'message': '', - 'remaining': [], - 'file_types': [('raw_barcodes', True, []), - ('raw_forward_seqs', True, []), - ('raw_reverse_seqs', False, [])], - 'num_prefixes': 1, - 'artifacts': [(1, 'Identification of the Microbiomes for ' - 'Cannabis Soils (1) - Raw data 1 (1)')]} + # check that you can't call a this function using two unrelated + # study_id and prep_template_id + with self.assertRaises(IncompetentQiitaDeveloperError): + study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ') + + def test_study_files_get_req_multiple(self): + study_id = 1 + # adding a new prep for testing + PREP = qdb.metadata_template.prep_template.PrepTemplate + prep_info_dict = { + 'SKB7.640196': {'run_prefix': 'test_1'}, + 'SKB8.640193': {'run_prefix': 'test_2'} + } + prep_info = pd.DataFrame.from_dict(prep_info_dict, + orient='index', dtype=str) + pt = npt.assert_warns( + qdb.exceptions.QiitaDBWarning, PREP.create, prep_info, + qdb.study.Study(study_id), "Metagenomic") + + # getting the upload folder so we can test + study_upload_dir = join( + qdb.util.get_mountpoint("uploads")[0][1], str(study_id)) + + # adding just foward per sample FASTQ to the upload folder + filenames = ['test_1.R1.fastq.gz', 'test_2.R1.fastq.gz'] + for f in filenames: + fpt = join(study_upload_dir, f) + open(fpt, 'w', 0).close() + self._clean_up_files.append(fpt) + obs = study_files_get_req( + 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') + exp = { + 'status': 'success', 'num_prefixes': 2, 'artifacts': [], + 'remaining': ['uploaded_file.txt'], 'message': '', + 'file_types': [ + ('raw_forward_seqs', True, + ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), + ('raw_reverse_seqs', False, [])]} self.assertEqual(obs, exp) - obs = study_files_get_req('admin@foo.bar', new_study.id, 1, 'FASTQ') - exp = {'status': 'success', - 'message': '', - 'remaining': [], - 'file_types': [('raw_barcodes', True, []), - ('raw_forward_seqs', True, []), - ('raw_reverse_seqs', False, [])], - 'num_prefixes': 1, - 'artifacts': []} + # let's add reverse + filenames = ['test_1.R2.fastq.gz', 'test_2.R2.fastq.gz'] + for f in filenames: + fpt = join(study_upload_dir, f) + open(fpt, 'w', 0).close() + self._clean_up_files.append(fpt) + obs = study_files_get_req( + 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') + exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [], + 'remaining': ['uploaded_file.txt'], 'message': '', + 'file_types': [('raw_forward_seqs', True, + ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), + ('raw_reverse_seqs', False, + ['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz'])]} self.assertEqual(obs, exp) - # Create some 'sff' files - upload_dir = qdb.util.get_mountpoint("uploads")[0][1] - study_upload_dir = join(upload_dir, str(new_study.id)) - fps = [] - - for i in range(2): - if not exists(study_upload_dir): - mkdir(study_upload_dir) - fd, fp = mkstemp(suffix=".sff", dir=study_upload_dir) - close(fd) - with open(fp, 'w') as f: - f.write('\n') - fps.append(fp) - - self._clean_up_files.extend(fps) + # let's an extra file that matches + filenames = ['test_1.R3.fastq.gz'] + for f in filenames: + fpt = join(study_upload_dir, f) + open(fpt, 'w', 0).close() + self._clean_up_files.append(fpt) + obs = study_files_get_req( + 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') + exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [], + 'remaining': ['test_1.R1.fastq.gz', 'test_1.R2.fastq.gz', + 'test_1.R3.fastq.gz', 'uploaded_file.txt'], + 'message': '', + 'file_types': [('raw_forward_seqs', True, + ['test_2.R1.fastq.gz']), + ('raw_reverse_seqs', False, + ['test_2.R2.fastq.gz'])]} + self.assertEqual(obs, exp) - obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'SFF') - exp = {'status': 'success', + # now if we select FASTQ we have 3 columns so the extra file should go + # to the 3rd column + obs = study_files_get_req( + 'shared@foo.bar', 1, pt.id, 'FASTQ') + exp = {'status': 'success', 'num_prefixes': 2, + 'remaining': ['uploaded_file.txt'], 'message': '', - 'remaining': [basename(fpath) for fpath in sorted(fps)], - 'file_types': [('raw_sff', True, [])], - 'num_prefixes': 0, - 'artifacts': []} + 'artifacts': [(1, 'Identification of the Microbiomes for ' + 'Cannabis Soils (1) - Raw data 1 (1)')], + 'file_types': [ + ('raw_barcodes', True, + ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), + ('raw_forward_seqs', True, + ['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz']), + ('raw_reverse_seqs', False, ['test_1.R3.fastq.gz'])]} self.assertEqual(obs, exp) + PREP.delete(pt.id) + + if __name__ == '__main__': main() diff --git a/qiita_pet/templates/study_ajax/add_artifact.html b/qiita_pet/templates/study_ajax/add_artifact.html index 22e3a3640..e0fec17a3 100644 --- a/qiita_pet/templates/study_ajax/add_artifact.html +++ b/qiita_pet/templates/study_ajax/add_artifact.html @@ -90,9 +90,6 @@

No files attached to this preparation

-
- Name: -
Select type:
+
+ Add a name for the file: +
diff --git a/qiita_pet/templates/study_ajax/add_prep_artifact.html b/qiita_pet/templates/study_ajax/add_prep_artifact.html deleted file mode 100644 index 86a7e08a4..000000000 --- a/qiita_pet/templates/study_ajax/add_prep_artifact.html +++ /dev/null @@ -1,191 +0,0 @@ -{% from qiita_core.qiita_settings import qiita_config %} - - - - -
- -
-
-

Preperation Name

- -
-
-
-
-

Select data type

-

-

-
-
-

Select Investigation Type

-

-

-

User defined investigation type: -

-

New user defined term: -

-
-
-

Select file type

- -
-
-

Select Prep Information

- -
-
- - -
diff --git a/qiita_pet/templates/study_ajax/artifact_file_selector.html b/qiita_pet/templates/study_ajax/artifact_file_selector.html index 059ef26f9..5f3c8e1ce 100644 --- a/qiita_pet/templates/study_ajax/artifact_file_selector.html +++ b/qiita_pet/templates/study_ajax/artifact_file_selector.html @@ -189,7 +189,7 @@
- Import files from other studies: + Now, you can import files from other studies