qiita-spots · wasade · Apr 11, 2017 · Apr 6, 2017 · Apr 6, 2017 · Apr 6, 2017
diff --git a/qiita_db/support_files/test_data/raw_data/1_s_G1_L001_sequences.fastq.gz b/qiita_db/support_files/test_data/raw_data/1_s_G1_L001_sequences.fastq.gz
diff --git a/qiita_db/support_files/test_data/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz b/qiita_db/support_files/test_data/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz
diff --git a/qiita_pet/handlers/api_proxy/studies.py b/qiita_pet/handlers/api_proxy/studies.py
@@ -100,6 +100,10 @@ def study_get_req(study_id, user_id):
     samples = study.sample_template
     study_info['num_samples'] = 0 if samples is None else len(list(samples))
     study_info['owner'] = study.owner.id
+    # Study.has_access no_public=True, will return True only if the user_id is
+    # the owner of the study or if the study is shared with the user_id
+    study_info['has_access_to_raw_data'] = study.has_access(
+        User(user_id), True)
 
     return {'status': 'success',
             'message': '',

diff --git a/qiita_pet/handlers/api_proxy/tests/test_studies.py b/qiita_pet/handlers/api_proxy/tests/test_studies.py
@@ -94,9 +94,9 @@ def test_study_get_req(self):
                 'number_samples_collected': 27,
                 'owner': 'test@foo.bar',
                 'ebi_submission_status': 'submitted',
+                'has_access_to_raw_data': True,
                 'ebi_study_accession': 'EBI123456-BB'},
             'editable': True}
-
         self.assertEqual(obs, exp)
 
         # Test with no lab person

diff --git a/qiita_pet/handlers/download.py b/qiita_pet/handlers/download.py
@@ -57,21 +57,13 @@ def get(self, study_id):
                                                  str(study_id)))
 
         study = Study(study_id)
-        user = self.current_user
         basedir = get_db_files_base_dir()
         basedir_len = len(basedir) + 1
         # loop over artifacts and retrieve those that we have access to
         to_download = []
-        vfabu = validate_filepath_access_by_user
         for a in study.artifacts():
             if a.artifact_type == 'BIOM':
-                to_add = True
                 for i, (fid, path, data_type) in enumerate(a.filepaths):
-                    # validate access only of the first artifact filepath,
-                    # the rest have the same permissions
-                    if (i == 0 and not vfabu(user, fid)):
-                        to_add = False
-                        break
                     # ignore if tgz as they could create problems and the
                     # raw data is in the folder
                     if data_type == 'tgz':
@@ -97,16 +89,15 @@ def get(self, study_id):
                         # how to trigger it
                         to_download.append((path, path, path))
 
-                if to_add:
-                    for pt in a.prep_templates:
-                        qmf = pt.qiime_map_fp
-                        if qmf is not None:
-                            sqmf = qmf
-                            if qmf.startswith(basedir):
-                                sqmf = qmf[basedir_len:]
-                            to_download.append(
-                                (qmf, sqmf, 'mapping_files/%s_mapping_file.txt'
-                                            % a.id))
+                for pt in a.prep_templates:
+                    qmf = pt.qiime_map_fp
+                    if qmf is not None:
+                        sqmf = qmf
+                        if qmf.startswith(basedir):
+                            sqmf = qmf[basedir_len:]
+                        to_download.append(
+                            (qmf, sqmf, 'mapping_files/%s_mapping_file.txt'
+                                        % a.id))
 
         # If we don't have nginx, write a file that indicates this
         all_files = '\n'.join(["- %s /protected/%s %s" % (getsize(fp), sfp, n)
@@ -130,6 +121,8 @@ def get(self, extras):
         _, relpath, _ = get_release_info()
 
         # If we don't have nginx, write a file that indicates this
+        # Note that this configuration will automatically create and download
+        # ("on the fly") the zip file via the contents in all_files
         self.write("This installation of Qiita was not equipped with nginx, "
                    "so it is incapable of serving files. The file you "
                    "attempted to download is located at %s" % relpath)
@@ -143,5 +136,81 @@ def get(self, extras):
                         '/protected-working_dir/' + relpath)
         self.set_header('Content-Disposition',
                         'attachment; filename=%s' % basename(relpath))
+        self.finish()
+
+
+class DownloadRawData(BaseHandler):
+    @authenticated
+    @execute_as_transaction
+    def get(self, study_id):
+        study_id = int(study_id)
+        # Check general access to study
+        study_info = study_get_req(study_id, self.current_user.id)
+        if study_info['status'] != 'success':
+            raise HTTPError(405, "%s: %s, %s" % (study_info['message'],
+                                                 self.current_user.email,
+                                                 str(study_id)))
+
+        study = Study(study_id)
+        user = self.current_user
+        # Check "owner" access to the study
+        if not study.has_access(user, True):
+            raise HTTPError(405, "%s: %s, %s" % ('No raw data access',
+                                                 self.current_user.email,
+                                                 str(study_id)))
+
+        basedir = get_db_files_base_dir()
+        basedir_len = len(basedir) + 1
+        # loop over artifacts and retrieve raw data (no parents)
+        to_download = []
+        for a in study.artifacts():
+            if not a.parents:
+                for i, (fid, path, data_type) in enumerate(a.filepaths):
+                    if data_type == 'directory':
+                        # If we have a directory, we actually need to list
+                        # all the files from the directory so NGINX can
+                        # actually download all of them
+                        for dp, _, fps in walk(path):
+                            for fname in fps:
+                                fullpath = join(dp, fname)
+                                spath = fullpath
+                                if fullpath.startswith(basedir):
+                                    spath = fullpath[basedir_len:]
+                                to_download.append((fullpath, spath, spath))
+                    elif path.startswith(basedir):
+                        spath = path[basedir_len:]
+                        to_download.append((path, spath, spath))
+                    else:
+                        # We are not aware of any case that can trigger this
+                        # situation, but we wanted to be overly cautious
+                        # There is no test for this line cause we don't know
+                        # how to trigger it
+                        to_download.append((path, path, path))
 
+                for pt in a.prep_templates:
+                    qmf = pt.qiime_map_fp
+                    if qmf is not None:
+                        sqmf = qmf
+                        if qmf.startswith(basedir):
+                            sqmf = qmf[basedir_len:]
+                        to_download.append(
+                            (qmf, sqmf, 'mapping_files/%s_mapping_file.txt'
+                                        % a.id))
+
+        # If we don't have nginx, write a file that indicates this
+        # Note that this configuration will automatically create and download
+        # ("on the fly") the zip file via the contents in all_files
+        all_files = '\n'.join(["- %s /protected/%s %s" % (getsize(fp), sfp, n)
+                               for fp, sfp, n in to_download])
+        self.write("%s\n" % all_files)
+
+        zip_fn = 'study_raw_data_%d_%s.zip' % (
+            study_id, datetime.now().strftime('%m%d%y-%H%M%S'))
+
+        self.set_header('Content-Description', 'File Transfer')
+        self.set_header('Expires', '0')
+        self.set_header('Cache-Control', 'no-cache')
+        self.set_header('X-Archive-Files', 'zip')
+        self.set_header('Content-Disposition',
+                        'attachment; filename=%s' % zip_fn)
         self.finish()
diff --git a/qiita_pet/templates/study_base.html b/qiita_pet/templates/study_base.html
@@ -240,6 +240,9 @@
       <button class="btn btn-default btn-block" onclick="populate_main_div('{% raw qiita_config.portal_dir %}/study/new_prep_template/', { study_id: {{study_info['study_id']}} })" id="add-new-preparation-btn"><span class="glyphicon glyphicon-plus-sign"></span> Add New Preparation</button>
     {% end %}
     <a class="btn btn-default btn-block" href="{% raw qiita_config.portal_dir %}/download_study_bioms/{{study_info['study_id']}}"><span class="glyphicon glyphicon-download-alt"></span> All QIIME maps and BIOMs</a>
+    {% if study_info['has_access_to_raw_data'] %}
+      <a class="btn btn-default btn-block" href="{% raw qiita_config.portal_dir %}/download_raw_data/{{study_info['study_id']}}"><span class="glyphicon glyphicon-download-alt"></span> All raw data</a>
+    {% end %}
     <div style="text-align: center;"><small><a href="{% raw qiita_config.portal_dir %}/static/doc/html/faq.html#how-to-solve-unzip-errors">Issues opening the downloaded zip?</a></small></div>
 
     <div id="data-types-menu"></div>

diff --git a/qiita_pet/test/test_download.py b/qiita_pet/test/test_download.py
@@ -19,6 +19,7 @@
 from qiita_pet.test.tornado_test_base import TestHandlerBase
 from qiita_pet.handlers.base_handlers import BaseHandler
 from qiita_db.user import User
+from qiita_db.study import Study
 from qiita_db.artifact import Artifact
 from qiita_db.software import Parameters, Command
 
@@ -77,8 +78,6 @@ def test_download_study(self):
         with open(tgz, 'w') as f:
             f.write('\n')
 
-        self._clean_up_files.append(tmp_dir)
-
         files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')]
 
         params = Parameters.from_default_params(
@@ -156,5 +155,53 @@ def test_download(self):
             "is located at", response.body)
 
 
+class TestDownloadRawData(TestHandlerBase):
+
+    def setUp(self):
+        super(TestDownloadRawData, self).setUp()
+        self._clean_up_files = []
+
+    def tearDown(self):
+        super(TestDownloadRawData, self).tearDown()
+        for fp in self._clean_up_files:
+            if exists(fp):
+                if isdir(fp):
+                    rmtree(fp)
+                else:
+                    remove(fp)
+
+    def test_download_raw_data(self):
+        # it's possible that one of the tests is deleting the raw data
+        # so we will make sure that the files exists so this test passes
+        all_files = [fp for a in Study(1).artifacts()
+                     for _, fp, _ in a.filepaths]
+        for fp in all_files:
+            if not exists(fp):
+                with open(fp, 'w') as f:
+                    f.write('')
+        response = self.get('/download_raw_data/1')
+        self.assertEqual(response.code, 200)
+
+        exp = (
+            '- 0 /protected/raw_data/1_s_G1_L001_sequences.fastq.gz '
+            'raw_data/1_s_G1_L001_sequences.fastq.gz\n'
+            '- 0 /protected/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz '
+            'raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n'
+            '- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.txt '
+            'mapping_files/1_mapping_file.txt\n'
+            '- 36615 /protected/templates/1_prep_2_qiime_[0-9]*-[0-9]*.txt '
+            'mapping_files/7_mapping_file.txt\n')
+        self.assertRegexpMatches(response.body, exp)
+
+        response = self.get('/download_study_bioms/200')
+        self.assertEqual(response.code, 405)
+
+        # changing user so we can test the failures
+        BaseHandler.get_current_user = Mock(
+            return_value=User("demo@microbio.me"))
+        response = self.get('/download_study_bioms/1')
+        self.assertEqual(response.code, 405)
+
+
 if __name__ == '__main__':
     main()
diff --git a/qiita_pet/webserver.py b/qiita_pet/webserver.py
@@ -39,7 +39,8 @@
 from qiita_pet.handlers.upload import UploadFileHandler, StudyUploadFileHandler
 from qiita_pet.handlers.stats import StatsHandler
 from qiita_pet.handlers.download import (
-    DownloadHandler, DownloadStudyBIOMSHandler, DownloadRelease)
+    DownloadHandler, DownloadStudyBIOMSHandler, DownloadRelease,
+    DownloadRawData)
 from qiita_pet.handlers.prep_template import PrepTemplateHandler
 from qiita_pet.handlers.ontology import OntologyHandler
 from qiita_db.handlers.processing_job import (
@@ -150,6 +151,7 @@ def __init__(self):
             (r"/download/(.*)", DownloadHandler),
             (r"/download_study_bioms/(.*)", DownloadStudyBIOMSHandler),
             (r"/release/download/(.*)", DownloadRelease),
+            (r"/download_raw_data/(.*)", DownloadRawData),
             (r"/vamps/(.*)", VAMPSHandler),
             # Plugin handlers - the order matters here so do not change
             # qiita_db/jobs/(.*) should go after any of the