From 19a9ddadcd784a8f86239968cb48eceb74de9134 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Fri, 27 Jan 2017 09:39:52 -0700 Subject: [PATCH 1/5] fix #1837 --- qiita_pet/handlers/api_proxy/studies.py | 50 +++-- .../handlers/api_proxy/tests/test_studies.py | 124 +++++++----- .../study_ajax/add_prep_artifact.html | 191 ------------------ 3 files changed, 112 insertions(+), 253 deletions(-) delete mode 100644 qiita_pet/templates/study_ajax/add_prep_artifact.html diff --git a/qiita_pet/handlers/api_proxy/studies.py b/qiita_pet/handlers/api_proxy/studies.py index 0cae2abb1..7ac9430f3 100644 --- a/qiita_pet/handlers/api_proxy/studies.py +++ b/qiita_pet/handlers/api_proxy/studies.py @@ -16,6 +16,7 @@ from qiita_db.util import (supported_filepath_types, get_files_from_uploads_folders) from qiita_pet.handlers.api_proxy.util import check_access +from qiita_core.exceptions import IncompetentQiitaDeveloperError def data_types_get_req(): @@ -198,7 +199,7 @@ def study_prep_get_req(study_id, user_id): def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): """Returns the uploaded files for the study id categorized by artifact_type - It retrieves the files uploaded for the given study and tries to do a + It retrieves the files uploaded for the given study and tries to guess on how those files should be added to the artifact of the given type. Uses information on the prep template to try to do a better guess. @@ -234,31 +235,48 @@ def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): remaining = [] uploaded = get_files_from_uploads_folders(study_id) - pt = PrepTemplate(prep_template_id).to_dataframe() + pt = PrepTemplate(prep_template_id) + if pt.study_id != study_id: + raise IncompetentQiitaDeveloperError( + "The requested prep id (%d) doesn't belong to the study " + "(%d)" % (pt.study_id, study_id)) + + pt = pt.to_dataframe() ftypes_if = (ft.startswith('raw_') for ft, _ in supp_file_types if ft != 'raw_sff') if any(ftypes_if) and 'run_prefix' in pt.columns: prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) - for _, filename in uploaded: - if filename.startswith(prep_prefixes): - selected.append(filename) - else: - remaining.append(filename) + # special case for per_sample_FASTQ + if artifact_type == 'per_sample_FASTQ': + # sorting prefixes by length to avoid collisions like: 100 1002 + # 10003 + prep_prefixes = sorted(prep_prefixes, key=len, reverse=True) + # group files by prefix + sfiles = {p: [f for _, f in uploaded if f.startswith(p)] + for p in prep_prefixes} + for k, v in viewitems(sfiles): + len_files = len(v) + if len_files != 1 and len_files != 2: + remaining.extend(v) + else: + v.sort() + selected.append(v) + else: + len_files = 1 + for _, filename in uploaded: + if filename.startswith(prep_prefixes): + selected.append(filename) + else: + remaining.append(filename) else: num_prefixes = 0 remaining = [f for _, f in uploaded] - # At this point we can't do anything smart about selecting by default - # the files for each type. The only thing that we can do is assume that - # the first in the supp_file_types list is the default one where files - # should be added in case of 'run_prefix' being present - file_types = [(fp_type, req, []) for fp_type, req in supp_file_types[1:]] - first = supp_file_types[0] - # Note that this works even if `run_prefix` is not in the prep template - # because selected is initialized to the empty list - file_types.insert(0, (first[0], first[1], selected)) + # get file_types, format: filetype, required, list of files + file_types = [(t, req, [x[i] for x in selected if i+1 <= len_files]) + for i, (t, req) in enumerate(supp_file_types)] # Create a list of artifacts that the user has access to, in case that # he wants to import the files from another artifact diff --git a/qiita_pet/handlers/api_proxy/tests/test_studies.py b/qiita_pet/handlers/api_proxy/tests/test_studies.py index 0a0e427a0..345e0ddb1 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_studies.py +++ b/qiita_pet/handlers/api_proxy/tests/test_studies.py @@ -7,15 +7,16 @@ # ----------------------------------------------------------------------------- from unittest import TestCase, main from datetime import datetime -from os.path import exists, join, basename, isdir -from os import remove, close, mkdir +from os.path import exists, join, isdir +from os import remove from shutil import rmtree -from tempfile import mkstemp, mkdtemp +from tempfile import mkdtemp import pandas as pd import numpy.testing as npt from qiita_core.util import qiita_test_checker +from qiita_core.exceptions import IncompetentQiitaDeveloperError import qiita_db as qdb from qiita_pet.handlers.api_proxy.studies import ( data_types_get_req, study_get_req, study_prep_get_req, study_delete_req, @@ -282,7 +283,9 @@ def test_study_prep_get_req_failed_EBI(self): } metadata = pd.DataFrame.from_dict(metadata_dict, orient='index', dtype=str) - qdb.metadata_template.sample_template.SampleTemplate.create( + npt.assert_warns( + qdb.exceptions.QiitaDBWarning, + qdb.metadata_template.sample_template.SampleTemplate.create, metadata, study) # (C) @@ -409,6 +412,7 @@ def test_study_files_get_req(self): 'Cannabis Soils (1) - Raw data 1 (1)')]} self.assertEqual(obs, exp) + # adding a new study for further testing info = { "timeseries_type_id": 1, "metadata_complete": True, @@ -422,58 +426,86 @@ def test_study_files_get_req(self): "principal_investigator_id": qdb.study.StudyPerson(3), "lab_person_id": qdb.study.StudyPerson(1) } - new_study = qdb.study.Study.create( qdb.user.User('test@foo.bar'), "Some New Study to get files", [1], info) - obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ') - exp = {'status': 'success', - 'message': '', - 'remaining': [], - 'file_types': [('raw_barcodes', True, []), - ('raw_forward_seqs', True, []), - ('raw_reverse_seqs', False, [])], - 'num_prefixes': 1, - 'artifacts': [(1, 'Identification of the Microbiomes for ' - 'Cannabis Soils (1) - Raw data 1 (1)')]} + # check that you can't call a this function using two unrelated + # study_id and prep_template_id + with self.assertRaises(IncompetentQiitaDeveloperError): + study_files_get_req('test@foo.bar', new_study.id, 1, 'FASTQ') + + def test_study_files_get_req_per_sample_FASTQ(self): + study_id = 1 + # adding a new prep for testing + PREP = qdb.metadata_template.prep_template.PrepTemplate + prep_info_dict = { + 'SKB7.640196': {'run_prefix': 'test_1'}, + 'SKB8.640193': {'run_prefix': 'test_2'} + } + prep_info = pd.DataFrame.from_dict(prep_info_dict, + orient='index', dtype=str) + pt = npt.assert_warns( + qdb.exceptions.QiitaDBWarning, PREP.create, prep_info, + qdb.study.Study(study_id), "Metagenomic") + + # getting the upload folder so we can test + study_upload_dir = join( + qdb.util.get_mountpoint("uploads")[0][1], str(study_id)) + + # adding just foward per sample FASTQ to the upload folder + filenames = ['test_1.R1.fastq.gz', 'test_2.R1.fastq.gz'] + for f in filenames: + fpt = join(study_upload_dir, f) + open(fpt, 'w', 0).close() + self._clean_up_files.append(fpt) + obs = study_files_get_req( + 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') + exp = { + 'status': 'success', 'num_prefixes': 2, 'artifacts': [], + 'remaining': [], 'message': '', + 'file_types': [ + ('raw_forward_seqs', True, + ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), + ('raw_reverse_seqs', False, [])]} self.assertEqual(obs, exp) - obs = study_files_get_req('admin@foo.bar', new_study.id, 1, 'FASTQ') - exp = {'status': 'success', - 'message': '', - 'remaining': [], - 'file_types': [('raw_barcodes', True, []), - ('raw_forward_seqs', True, []), - ('raw_reverse_seqs', False, [])], - 'num_prefixes': 1, - 'artifacts': []} + # let's add reverse + filenames = ['test_1.R2.fastq.gz', 'test_2.R2.fastq.gz'] + for f in filenames: + fpt = join(study_upload_dir, f) + open(fpt, 'w', 0).close() + self._clean_up_files.append(fpt) + obs = study_files_get_req( + 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') + exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [], + 'remaining': [], 'message': '', + 'file_types': [('raw_forward_seqs', True, + ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), + ('raw_reverse_seqs', False, + ['test_2.R2.fastq.gz', 'test_1.R2.fastq.gz'])]} self.assertEqual(obs, exp) - # Create some 'sff' files - upload_dir = qdb.util.get_mountpoint("uploads")[0][1] - study_upload_dir = join(upload_dir, str(new_study.id)) - fps = [] - - for i in range(2): - if not exists(study_upload_dir): - mkdir(study_upload_dir) - fd, fp = mkstemp(suffix=".sff", dir=study_upload_dir) - close(fd) - with open(fp, 'w') as f: - f.write('\n') - fps.append(fp) - - self._clean_up_files.extend(fps) - - obs = study_files_get_req('test@foo.bar', new_study.id, 1, 'SFF') - exp = {'status': 'success', + # let's an extra file that matches + filenames = ['test_1.R3.fastq.gz'] + for f in filenames: + fpt = join(study_upload_dir, f) + open(fpt, 'w', 0).close() + self._clean_up_files.append(fpt) + obs = study_files_get_req( + 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') + exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [], + 'remaining': ['test_1.R1.fastq.gz', 'test_1.R2.fastq.gz', + 'test_1.R3.fastq.gz'], 'message': '', - 'remaining': [basename(fpath) for fpath in sorted(fps)], - 'file_types': [('raw_sff', True, [])], - 'num_prefixes': 0, - 'artifacts': []} + 'file_types': [('raw_forward_seqs', True, + ['test_2.R1.fastq.gz']), + ('raw_reverse_seqs', False, + ['test_2.R2.fastq.gz'])]} self.assertEqual(obs, exp) + PREP.delete(pt.id) + + if __name__ == '__main__': main() diff --git a/qiita_pet/templates/study_ajax/add_prep_artifact.html b/qiita_pet/templates/study_ajax/add_prep_artifact.html deleted file mode 100644 index 86a7e08a4..000000000 --- a/qiita_pet/templates/study_ajax/add_prep_artifact.html +++ /dev/null @@ -1,191 +0,0 @@ -{% from qiita_core.qiita_settings import qiita_config %} - - - - -
-
-
-
-

Preperation Name

- -
-
-
-
-

Select data type

-

-

-
-
-

Select Investigation Type

-

-

-

User defined investigation type: -

-

New user defined term: -

-
-
-

Select file type

- -
-
-

Select Prep Information

- -
-
- - -
From 6f0dd71f4e5ab5fe007c95b2ac567b7c3caccfe4 Mon Sep 17 00:00:00 2001 From: Antonio Gonzalez Date: Sat, 28 Jan 2017 07:05:14 -0700 Subject: [PATCH 2/5] generalizing this functionality --- qiita_pet/handlers/api_proxy/studies.py | 40 ++++++++----------- .../handlers/api_proxy/tests/test_studies.py | 10 +++-- .../templates/study_ajax/add_artifact.html | 6 +-- .../study_ajax/artifact_file_selector.html | 9 ++++- 4 files changed, 34 insertions(+), 31 deletions(-) diff --git a/qiita_pet/handlers/api_proxy/studies.py b/qiita_pet/handlers/api_proxy/studies.py index 7ac9430f3..e92c6e854 100644 --- a/qiita_pet/handlers/api_proxy/studies.py +++ b/qiita_pet/handlers/api_proxy/studies.py @@ -248,34 +248,28 @@ def study_files_get_req(user_id, study_id, prep_template_id, artifact_type): if any(ftypes_if) and 'run_prefix' in pt.columns: prep_prefixes = tuple(set(pt['run_prefix'])) num_prefixes = len(prep_prefixes) - # special case for per_sample_FASTQ - if artifact_type == 'per_sample_FASTQ': - # sorting prefixes by length to avoid collisions like: 100 1002 - # 10003 - prep_prefixes = sorted(prep_prefixes, key=len, reverse=True) - # group files by prefix - sfiles = {p: [f for _, f in uploaded if f.startswith(p)] - for p in prep_prefixes} - for k, v in viewitems(sfiles): - len_files = len(v) - if len_files != 1 and len_files != 2: - remaining.extend(v) - else: - v.sort() - selected.append(v) - else: - len_files = 1 - for _, filename in uploaded: - if filename.startswith(prep_prefixes): - selected.append(filename) - else: - remaining.append(filename) + # sorting prefixes by length to avoid collisions like: 100 1002 + # 10003 + prep_prefixes = sorted(prep_prefixes, key=len, reverse=True) + # group files by prefix + sfiles = {p: [f for _, f in uploaded if f.startswith(p)] + for p in prep_prefixes} + inuse = [y for x in sfiles.values() for y in x] + remaining.extend([f for _, f in uploaded if f not in inuse]) + + for k, v in viewitems(sfiles): + len_files = len(v) + if len_files != 1 and len_files != 2: + remaining.extend(v) + else: + v.sort() + selected.append(v) else: num_prefixes = 0 remaining = [f for _, f in uploaded] # get file_types, format: filetype, required, list of files - file_types = [(t, req, [x[i] for x in selected if i+1 <= len_files]) + file_types = [(t, req, [x[i] for x in selected if i+1 <= len(x)]) for i, (t, req) in enumerate(supp_file_types)] # Create a list of artifacts that the user has access to, in case that diff --git a/qiita_pet/handlers/api_proxy/tests/test_studies.py b/qiita_pet/handlers/api_proxy/tests/test_studies.py index 345e0ddb1..494c580a7 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_studies.py +++ b/qiita_pet/handlers/api_proxy/tests/test_studies.py @@ -238,6 +238,8 @@ def test_study_prep_get_req(self): for i in range(4, 0, -1): qdb.artifact.Artifact(i).visibility = "private" + qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) + def test_study_prep_get_req_failed_EBI(self): temp_dir = mkdtemp() self._clean_up_files.append(temp_dir) @@ -337,6 +339,8 @@ def test_study_prep_get_req_failed_EBI(self): 'status': 'success'} self.assertEqual(obs, exp) + qdb.metadata_template.prep_template.PrepTemplate.delete(pt.id) + def test_study_prep_get_req_no_access(self): obs = study_prep_get_req(1, 'demo@microbio.me') exp = {'status': 'error', @@ -463,7 +467,7 @@ def test_study_files_get_req_per_sample_FASTQ(self): 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') exp = { 'status': 'success', 'num_prefixes': 2, 'artifacts': [], - 'remaining': [], 'message': '', + 'remaining': ['uploaded_file.txt'], 'message': '', 'file_types': [ ('raw_forward_seqs', True, ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), @@ -479,7 +483,7 @@ def test_study_files_get_req_per_sample_FASTQ(self): obs = study_files_get_req( 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [], - 'remaining': [], 'message': '', + 'remaining': ['uploaded_file.txt'], 'message': '', 'file_types': [('raw_forward_seqs', True, ['test_2.R1.fastq.gz', 'test_1.R1.fastq.gz']), ('raw_reverse_seqs', False, @@ -496,7 +500,7 @@ def test_study_files_get_req_per_sample_FASTQ(self): 'shared@foo.bar', 1, pt.id, 'per_sample_FASTQ') exp = {'status': 'success', 'num_prefixes': 2, 'artifacts': [], 'remaining': ['test_1.R1.fastq.gz', 'test_1.R2.fastq.gz', - 'test_1.R3.fastq.gz'], + 'test_1.R3.fastq.gz', 'uploaded_file.txt'], 'message': '', 'file_types': [('raw_forward_seqs', True, ['test_2.R1.fastq.gz']), diff --git a/qiita_pet/templates/study_ajax/add_artifact.html b/qiita_pet/templates/study_ajax/add_artifact.html index 22e3a3640..e0fec17a3 100644 --- a/qiita_pet/templates/study_ajax/add_artifact.html +++ b/qiita_pet/templates/study_ajax/add_artifact.html @@ -90,9 +90,6 @@

No files attached to this preparation

-
- Name: -
Select type:
+
+ Add a name for the file: +
diff --git a/qiita_pet/templates/study_ajax/artifact_file_selector.html b/qiita_pet/templates/study_ajax/artifact_file_selector.html index c341df246..ecbc3dc9d 100644 --- a/qiita_pet/templates/study_ajax/artifact_file_selector.html +++ b/qiita_pet/templates/study_ajax/artifact_file_selector.html @@ -183,7 +183,7 @@
- Import files from other studies: + Now, you can import files from other studies