Skip to content

Commit bef4640

Browse files
authored
Merge pull request #2104 from antgonza/download-raw-data
Download bulk raw data
2 parents 56a20b4 + 7d1ea83 commit bef4640

File tree

8 files changed

+147
-22
lines changed

8 files changed

+147
-22
lines changed
Binary file not shown.
Binary file not shown.

qiita_pet/handlers/api_proxy/studies.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ def study_get_req(study_id, user_id):
100100
samples = study.sample_template
101101
study_info['num_samples'] = 0 if samples is None else len(list(samples))
102102
study_info['owner'] = study.owner.id
103+
# Study.has_access no_public=True, will return True only if the user_id is
104+
# the owner of the study or if the study is shared with the user_id
105+
study_info['has_access_to_raw_data'] = study.has_access(
106+
User(user_id), True)
103107

104108
return {'status': 'success',
105109
'message': '',

qiita_pet/handlers/api_proxy/tests/test_studies.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,9 @@ def test_study_get_req(self):
9494
'number_samples_collected': 27,
9595
'owner': 'test@foo.bar',
9696
'ebi_submission_status': 'submitted',
97+
'has_access_to_raw_data': True,
9798
'ebi_study_accession': 'EBI123456-BB'},
9899
'editable': True}
99-
100100
self.assertEqual(obs, exp)
101101

102102
# Test with no lab person

qiita_pet/handlers/download.py

Lines changed: 87 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -57,21 +57,13 @@ def get(self, study_id):
5757
str(study_id)))
5858

5959
study = Study(study_id)
60-
user = self.current_user
6160
basedir = get_db_files_base_dir()
6261
basedir_len = len(basedir) + 1
6362
# loop over artifacts and retrieve those that we have access to
6463
to_download = []
65-
vfabu = validate_filepath_access_by_user
6664
for a in study.artifacts():
6765
if a.artifact_type == 'BIOM':
68-
to_add = True
6966
for i, (fid, path, data_type) in enumerate(a.filepaths):
70-
# validate access only of the first artifact filepath,
71-
# the rest have the same permissions
72-
if (i == 0 and not vfabu(user, fid)):
73-
to_add = False
74-
break
7567
# ignore if tgz as they could create problems and the
7668
# raw data is in the folder
7769
if data_type == 'tgz':
@@ -97,16 +89,15 @@ def get(self, study_id):
9789
# how to trigger it
9890
to_download.append((path, path, path))
9991

100-
if to_add:
101-
for pt in a.prep_templates:
102-
qmf = pt.qiime_map_fp
103-
if qmf is not None:
104-
sqmf = qmf
105-
if qmf.startswith(basedir):
106-
sqmf = qmf[basedir_len:]
107-
to_download.append(
108-
(qmf, sqmf, 'mapping_files/%s_mapping_file.txt'
109-
% a.id))
92+
for pt in a.prep_templates:
93+
qmf = pt.qiime_map_fp
94+
if qmf is not None:
95+
sqmf = qmf
96+
if qmf.startswith(basedir):
97+
sqmf = qmf[basedir_len:]
98+
to_download.append(
99+
(qmf, sqmf, 'mapping_files/%s_mapping_file.txt'
100+
% a.id))
110101

111102
# If we don't have nginx, write a file that indicates this
112103
all_files = '\n'.join(["- %s /protected/%s %s" % (getsize(fp), sfp, n)
@@ -130,6 +121,8 @@ def get(self, extras):
130121
_, relpath, _ = get_release_info()
131122

132123
# If we don't have nginx, write a file that indicates this
124+
# Note that this configuration will automatically create and download
125+
# ("on the fly") the zip file via the contents in all_files
133126
self.write("This installation of Qiita was not equipped with nginx, "
134127
"so it is incapable of serving files. The file you "
135128
"attempted to download is located at %s" % relpath)
@@ -143,5 +136,81 @@ def get(self, extras):
143136
'/protected-working_dir/' + relpath)
144137
self.set_header('Content-Disposition',
145138
'attachment; filename=%s' % basename(relpath))
139+
self.finish()
140+
141+
142+
class DownloadRawData(BaseHandler):
143+
@authenticated
144+
@execute_as_transaction
145+
def get(self, study_id):
146+
study_id = int(study_id)
147+
# Check general access to study
148+
study_info = study_get_req(study_id, self.current_user.id)
149+
if study_info['status'] != 'success':
150+
raise HTTPError(405, "%s: %s, %s" % (study_info['message'],
151+
self.current_user.email,
152+
str(study_id)))
153+
154+
study = Study(study_id)
155+
user = self.current_user
156+
# Check "owner" access to the study
157+
if not study.has_access(user, True):
158+
raise HTTPError(405, "%s: %s, %s" % ('No raw data access',
159+
self.current_user.email,
160+
str(study_id)))
161+
162+
basedir = get_db_files_base_dir()
163+
basedir_len = len(basedir) + 1
164+
# loop over artifacts and retrieve raw data (no parents)
165+
to_download = []
166+
for a in study.artifacts():
167+
if not a.parents:
168+
for i, (fid, path, data_type) in enumerate(a.filepaths):
169+
if data_type == 'directory':
170+
# If we have a directory, we actually need to list
171+
# all the files from the directory so NGINX can
172+
# actually download all of them
173+
for dp, _, fps in walk(path):
174+
for fname in fps:
175+
fullpath = join(dp, fname)
176+
spath = fullpath
177+
if fullpath.startswith(basedir):
178+
spath = fullpath[basedir_len:]
179+
to_download.append((fullpath, spath, spath))
180+
elif path.startswith(basedir):
181+
spath = path[basedir_len:]
182+
to_download.append((path, spath, spath))
183+
else:
184+
# We are not aware of any case that can trigger this
185+
# situation, but we wanted to be overly cautious
186+
# There is no test for this line cause we don't know
187+
# how to trigger it
188+
to_download.append((path, path, path))
146189

190+
for pt in a.prep_templates:
191+
qmf = pt.qiime_map_fp
192+
if qmf is not None:
193+
sqmf = qmf
194+
if qmf.startswith(basedir):
195+
sqmf = qmf[basedir_len:]
196+
to_download.append(
197+
(qmf, sqmf, 'mapping_files/%s_mapping_file.txt'
198+
% a.id))
199+
200+
# If we don't have nginx, write a file that indicates this
201+
# Note that this configuration will automatically create and download
202+
# ("on the fly") the zip file via the contents in all_files
203+
all_files = '\n'.join(["- %s /protected/%s %s" % (getsize(fp), sfp, n)
204+
for fp, sfp, n in to_download])
205+
self.write("%s\n" % all_files)
206+
207+
zip_fn = 'study_raw_data_%d_%s.zip' % (
208+
study_id, datetime.now().strftime('%m%d%y-%H%M%S'))
209+
210+
self.set_header('Content-Description', 'File Transfer')
211+
self.set_header('Expires', '0')
212+
self.set_header('Cache-Control', 'no-cache')
213+
self.set_header('X-Archive-Files', 'zip')
214+
self.set_header('Content-Disposition',
215+
'attachment; filename=%s' % zip_fn)
147216
self.finish()

qiita_pet/templates/study_base.html

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,9 @@
240240
<button class="btn btn-default btn-block" onclick="populate_main_div('{% raw qiita_config.portal_dir %}/study/new_prep_template/', { study_id: {{study_info['study_id']}} })" id="add-new-preparation-btn"><span class="glyphicon glyphicon-plus-sign"></span> Add New Preparation</button>
241241
{% end %}
242242
<a class="btn btn-default btn-block" href="{% raw qiita_config.portal_dir %}/download_study_bioms/{{study_info['study_id']}}"><span class="glyphicon glyphicon-download-alt"></span> All QIIME maps and BIOMs</a>
243+
{% if study_info['has_access_to_raw_data'] %}
244+
<a class="btn btn-default btn-block" href="{% raw qiita_config.portal_dir %}/download_raw_data/{{study_info['study_id']}}"><span class="glyphicon glyphicon-download-alt"></span> All raw data</a>
245+
{% end %}
243246
<div style="text-align: center;"><small><a href="{% raw qiita_config.portal_dir %}/static/doc/html/faq.html#how-to-solve-unzip-errors">Issues opening the downloaded zip?</a></small></div>
244247

245248
<div id="data-types-menu"></div>

qiita_pet/test/test_download.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from qiita_pet.test.tornado_test_base import TestHandlerBase
2020
from qiita_pet.handlers.base_handlers import BaseHandler
2121
from qiita_db.user import User
22+
from qiita_db.study import Study
2223
from qiita_db.artifact import Artifact
2324
from qiita_db.software import Parameters, Command
2425

@@ -77,8 +78,6 @@ def test_download_study(self):
7778
with open(tgz, 'w') as f:
7879
f.write('\n')
7980

80-
self._clean_up_files.append(tmp_dir)
81-
8281
files_biom = [(biom_fp, 'biom'), (smr_dir, 'directory'), (tgz, 'tgz')]
8382

8483
params = Parameters.from_default_params(
@@ -156,5 +155,53 @@ def test_download(self):
156155
"is located at", response.body)
157156

158157

158+
class TestDownloadRawData(TestHandlerBase):
159+
160+
def setUp(self):
161+
super(TestDownloadRawData, self).setUp()
162+
self._clean_up_files = []
163+
164+
def tearDown(self):
165+
super(TestDownloadRawData, self).tearDown()
166+
for fp in self._clean_up_files:
167+
if exists(fp):
168+
if isdir(fp):
169+
rmtree(fp)
170+
else:
171+
remove(fp)
172+
173+
def test_download_raw_data(self):
174+
# it's possible that one of the tests is deleting the raw data
175+
# so we will make sure that the files exists so this test passes
176+
all_files = [fp for a in Study(1).artifacts()
177+
for _, fp, _ in a.filepaths]
178+
for fp in all_files:
179+
if not exists(fp):
180+
with open(fp, 'w') as f:
181+
f.write('')
182+
response = self.get('/download_raw_data/1')
183+
self.assertEqual(response.code, 200)
184+
185+
exp = (
186+
'- 0 /protected/raw_data/1_s_G1_L001_sequences.fastq.gz '
187+
'raw_data/1_s_G1_L001_sequences.fastq.gz\n'
188+
'- 0 /protected/raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz '
189+
'raw_data/1_s_G1_L001_sequences_barcodes.fastq.gz\n'
190+
'- 36615 /protected/templates/1_prep_1_qiime_[0-9]*-[0-9]*.txt '
191+
'mapping_files/1_mapping_file.txt\n'
192+
'- 36615 /protected/templates/1_prep_2_qiime_[0-9]*-[0-9]*.txt '
193+
'mapping_files/7_mapping_file.txt\n')
194+
self.assertRegexpMatches(response.body, exp)
195+
196+
response = self.get('/download_study_bioms/200')
197+
self.assertEqual(response.code, 405)
198+
199+
# changing user so we can test the failures
200+
BaseHandler.get_current_user = Mock(
201+
return_value=User("demo@microbio.me"))
202+
response = self.get('/download_study_bioms/1')
203+
self.assertEqual(response.code, 405)
204+
205+
159206
if __name__ == '__main__':
160207
main()

qiita_pet/webserver.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@
3939
from qiita_pet.handlers.upload import UploadFileHandler, StudyUploadFileHandler
4040
from qiita_pet.handlers.stats import StatsHandler
4141
from qiita_pet.handlers.download import (
42-
DownloadHandler, DownloadStudyBIOMSHandler, DownloadRelease)
42+
DownloadHandler, DownloadStudyBIOMSHandler, DownloadRelease,
43+
DownloadRawData)
4344
from qiita_pet.handlers.prep_template import PrepTemplateHandler
4445
from qiita_pet.handlers.ontology import OntologyHandler
4546
from qiita_db.handlers.processing_job import (
@@ -150,6 +151,7 @@ def __init__(self):
150151
(r"/download/(.*)", DownloadHandler),
151152
(r"/download_study_bioms/(.*)", DownloadStudyBIOMSHandler),
152153
(r"/release/download/(.*)", DownloadRelease),
154+
(r"/download_raw_data/(.*)", DownloadRawData),
153155
(r"/vamps/(.*)", VAMPSHandler),
154156
# Plugin handlers - the order matters here so do not change
155157
# qiita_db/jobs/(.*) should go after any of the

0 commit comments

Comments
 (0)