diff --git a/qiita_db/support_files/test_data/raw_data/1_s_G1_L001_sequences.fastq.gz b/qiita_db/support_files/test_data/raw_data/1_s_G1_L001_sequences.fastq.gz new file mode 100644 index 000000000..76cb17801 Binary files /dev/null and b/qiita_db/support_files/test_data/raw_data/1_s_G1_L001_sequences.fastq.gz differ diff --git a/qiita_db/support_files/test_data/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz b/qiita_db/support_files/test_data/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz new file mode 100644 index 000000000..76cb17801 Binary files /dev/null and b/qiita_db/support_files/test_data/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz differ diff --git a/qiita_pet/handlers/api_proxy/studies.py b/qiita_pet/handlers/api_proxy/studies.py index a676a35b8..8e211d3fc 100644 --- a/qiita_pet/handlers/api_proxy/studies.py +++ b/qiita_pet/handlers/api_proxy/studies.py @@ -100,6 +100,10 @@ def study_get_req(study_id, user_id): samples = study.sample_template study_info['num_samples'] = 0 if samples is None else len(list(samples)) study_info['owner'] = study.owner.id + # Study.has_access no_public=True, will return True only if the user_id is + # the owner of the study or if the study is shared with the user_id + study_info['has_access_to_raw_data'] = study.has_access( + User(user_id), True) return {'status': 'success', 'message': '', diff --git a/qiita_pet/handlers/api_proxy/tests/test_studies.py b/qiita_pet/handlers/api_proxy/tests/test_studies.py index a838a2413..b20da31b8 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_studies.py +++ b/qiita_pet/handlers/api_proxy/tests/test_studies.py @@ -94,9 +94,9 @@ def test_study_get_req(self): 'number_samples_collected': 27, 'owner': 'test@foo.bar', 'ebi_submission_status': 'submitted', + 'has_access_to_raw_data': True, 'ebi_study_accession': 'EBI123456-BB'}, 'editable': True} - self.assertEqual(obs, exp) # Test with no lab person diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py index 5b766a23a..2b797bfdb 100644 --- a/qiita_pet/handlers/download.py +++ b/qiita_pet/handlers/download.py @@ -57,21 +57,13 @@ def get(self, study_id): str(study_id))) study = Study(study_id) - user = self.current_user basedir = get_db_files_base_dir() basedir_len = len(basedir) + 1 # loop over artifacts and retrieve those that we have access to to_download = [] - vfabu = validate_filepath_access_by_user for a in study.artifacts(): if a.artifact_type == 'BIOM': - to_add = True for i, (fid, path, data_type) in enumerate(a.filepaths): - # validate access only of the first artifact filepath, - # the rest have the same permissions - if (i == 0 and not vfabu(user, fid)): - to_add = False - break # ignore if tgz as they could create problems and the # raw data is in the folder if data_type == 'tgz': @@ -97,16 +89,15 @@ def get(self, study_id): # how to trigger it to_download.append((path, path, path)) - if to_add: - for pt in a.prep_templates: - qmf = pt.qiime_map_fp - if qmf is not None: - sqmf = qmf - if qmf.startswith(basedir): - sqmf = qmf[basedir_len:] - to_download.append( - (qmf, sqmf, 'mapping_files/%s_mapping_file.txt' - % a.id)) + for pt in a.prep_templates: + qmf = pt.qiime_map_fp + if qmf is not None: + sqmf = qmf + if qmf.startswith(basedir): + sqmf = qmf[basedir_len:] + to_download.append( + (qmf, sqmf, 'mapping_files/%s_mapping_file.txt' + % a.id)) # If we don't have nginx, write a file that indicates this all_files = '\n'.join(["- %s /protected/%s %s" % (getsize(fp), sfp, n) @@ -130,6 +121,8 @@ def get(self, extras): _, relpath, _ = get_release_info() # If we don't have nginx, write a file that indicates this + # Note that this configuration will automatically create and download + # ("on the fly") the zip file via the contents in all_files self.write("This installation of Qiita was not equipped with nginx, " "so it is incapable of serving files. The file you " "attempted to download is located at %s" % relpath) @@ -143,5 +136,81 @@ def get(self, extras): '/protected-working_dir/' + relpath) self.set_header('Content-Disposition', 'attachment; filename=%s' % basename(relpath)) + self.finish() + + +class DownloadRawData(BaseHandler): + @authenticated + @execute_as_transaction + def get(self, study_id): + study_id = int(study_id) + # Check general access to study + study_info = study_get_req(study_id, self.current_user.id) + if study_info['status'] != 'success': + raise HTTPError(405, "%s: %s, %s" % (study_info['message'], + self.current_user.email, + str(study_id))) + + study = Study(study_id) + user = self.current_user + # Check "owner" access to the study + if not study.has_access(user, True): + raise HTTPError(405, "%s: %s, %s" % ('No raw data access', + self.current_user.email, + str(study_id))) + + basedir = get_db_files_base_dir() + basedir_len = len(basedir) + 1 + # loop over artifacts and retrieve raw data (no parents) + to_download = [] + for a in study.artifacts(): + if not a.parents: + for i, (fid, path, data_type) in enumerate(a.filepaths): + if data_type == 'directory': + # If we have a directory, we actually need to list + # all the files from the directory so NGINX can + # actually download all of them + for dp, _, fps in walk(path): + for fname in fps: + fullpath = join(dp, fname) + spath = fullpath + if fullpath.startswith(basedir): + spath = fullpath[basedir_len:] + to_download.append((fullpath, spath, spath)) + elif path.startswith(basedir): + spath = path[basedir_len:] + to_download.append((path, spath, spath)) + else: + # We are not aware of any case that can trigger this + # situation, but we wanted to be overly cautious + # There is no test for this line cause we don't know + # how to trigger it + to_download.append((path, path, path)) + for pt in a.prep_templates: + qmf = pt.qiime_map_fp + if qmf is not None: + sqmf = qmf + if qmf.startswith(basedir): + sqmf = qmf[basedir_len:] + to_download.append( + (qmf, sqmf, 'mapping_files/%s_mapping_file.txt' + % a.id)) + + # If we don't have nginx, write a file that indicates this + # Note that this configuration will automatically create and download + # ("on the fly") the zip file via the contents in all_files + all_files = '\n'.join(["- %s /protected/%s %s" % (getsize(fp), sfp, n) + for fp, sfp, n in to_download]) + self.write("%s\n" % all_files) + + zip_fn = 'study_raw_data_%d_%s.zip' % ( + study_id, datetime.now().strftime('%m%d%y-%H%M%S')) + + self.set_header('Content-Description', 'File Transfer') + self.set_header('Expires', '0') + self.set_header('Cache-Control', 'no-cache') + self.set_header('X-Archive-Files', 'zip') + self.set_header('Content-Disposition', + 'attachment; filename=%s' % zip_fn) self.finish() diff --git a/qiita_pet/templates/study_base.html b/qiita_pet/templates/study_base.html index 20a44ba94..3657b0449 100644 --- a/qiita_pet/templates/study_base.html +++ b/qiita_pet/templates/study_base.html @@ -240,6 +240,9 @@ {% end %} All QIIME maps and BIOMs + {% if study_info['has_access_to_raw_data'] %} + All raw data + {% end %}
Issues opening the downloaded zip?
diff --git a/qiita_pet/test/test_download.py b/qiita_pet/test/test_download.py index 34c81136b..95e254c77 100644 --- a/qiita_pet/test/test_download.py +++ b/qiita_pet/test/test_download.py @@ -19,6 +19,7 @@ from qiita_pet.test.tornado_test_base import TestHandlerBase from qiita_pet.handlers.base_handlers import BaseHandler from qiita_db.user import User +from qiita_db.study import Study from qiita_db.artifact import Artifact from qiita_db.software import Parameters, Command @@ -77,8 +78,6 @@ def test_download_study(self): with open(tgz, 'w') as f: f.write('\n') - self._clean_up_files.append(tmp_dir) - files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')] params = Parameters.from_default_params( @@ -156,5 +155,53 @@ def test_download(self): "is located at", response.body) +class TestDownloadRawData(TestHandlerBase): + + def setUp(self): + super(TestDownloadRawData, self).setUp() + self._clean_up_files = [] + + def tearDown(self): + super(TestDownloadRawData, self).tearDown() + for fp in self._clean_up_files: + if exists(fp): + if isdir(fp): + rmtree(fp) + else: + remove(fp) + + def test_download_raw_data(self): + # it's possible that one of the tests is deleting the raw data + # so we will make sure that the files exists so this test passes + all_files = [fp for a in Study(1).artifacts() + for _, fp, _ in a.filepaths] + for fp in all_files: + if not exists(fp): + with open(fp, 'w') as f: + f.write('') + response = self.get('/download_raw_data/1') + self.assertEqual(response.code, 200) + + exp = ( + '- 0 /protected/raw_data/1_s_G1_L001_sequences.fastq.gz ' + 'raw_data/1_s_G1_L001_sequences.fastq.gz\n' + '- 0 /protected/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz ' + 'raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n' + '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.txt ' + 'mapping_files/1_mapping_file.txt\n' + '- 36615 /protected/templates/1_prep_2_qiime_[0-9]*-[0-9]*.txt ' + 'mapping_files/7_mapping_file.txt\n') + self.assertRegexpMatches(response.body, exp) + + response = self.get('/download_study_bioms/200') + self.assertEqual(response.code, 405) + + # changing user so we can test the failures + BaseHandler.get_current_user = Mock( + return_value=User("demo@microbio.me")) + response = self.get('/download_study_bioms/1') + self.assertEqual(response.code, 405) + + if __name__ == '__main__': main() diff --git a/qiita_pet/webserver.py b/qiita_pet/webserver.py index d62d4db43..f73d6c05f 100644 --- a/qiita_pet/webserver.py +++ b/qiita_pet/webserver.py @@ -39,7 +39,8 @@ from qiita_pet.handlers.upload import UploadFileHandler, StudyUploadFileHandler from qiita_pet.handlers.stats import StatsHandler from qiita_pet.handlers.download import ( - DownloadHandler, DownloadStudyBIOMSHandler, DownloadRelease) + DownloadHandler, DownloadStudyBIOMSHandler, DownloadRelease, + DownloadRawData) from qiita_pet.handlers.prep_template import PrepTemplateHandler from qiita_pet.handlers.ontology import OntologyHandler from qiita_db.handlers.processing_job import ( @@ -150,6 +151,7 @@ def __init__(self): (r"/download/(.*)", DownloadHandler), (r"/download_study_bioms/(.*)", DownloadStudyBIOMSHandler), (r"/release/download/(.*)", DownloadRelease), + (r"/download_raw_data/(.*)", DownloadRawData), (r"/vamps/(.*)", VAMPSHandler), # Plugin handlers - the order matters here so do not change # qiita_db/jobs/(.*) should go after any of the