Skip to content

Commit 11f3657

Browse files
committed
fix #814, fix #1636
1 parent 131dd6a commit 11f3657

File tree

6 files changed

+313
-13
lines changed

6 files changed

+313
-13
lines changed

.travis.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ script:
4040
- qiita-env start_cluster qiita-general
4141
- qiita-env make --no-load-ontologies
4242
- if [ ${TEST_ADD_STUDIES} == "True" ]; then test_data_studies/commands.sh ; fi
43+
- if [ ${TEST_ADD_STUDIES} == "True" ]; then qiita-cron-job ; fi
4344
- if [ ${TEST_ADD_STUDIES} == "False" ]; then qiita-test-install ; fi
4445
- if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests --with-doctest --with-coverage -v --cover-package=qiita_db,qiita_pet,qiita_core,qiita_ware; fi
45-
- flake8 qiita_* setup.py scripts/qiita scripts/qiita-env scripts/qiita-test-install
46+
- flake8 qiita_* setup.py scripts/*
4647
- ls -R /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_pet/support_files/doc/
4748
- qiita pet webserver
4849
addons:

qiita_db/meta_util.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@
2525
# -----------------------------------------------------------------------------
2626
from __future__ import division
2727

28+
from moi import r_client
29+
from os import stat
30+
from time import strftime, localtime
31+
import matplotlib.pyplot as plt
32+
import matplotlib as mpl
33+
from base64 import b64encode
34+
from urllib import quote
35+
from StringIO import StringIO
36+
from future.utils import viewitems
37+
2838
from qiita_core.qiita_settings import qiita_config
2939
import qiita_db as qdb
3040

@@ -122,6 +132,147 @@ def get_accessible_filepath_ids(user):
122132
return filepath_ids
123133

124134

135+
def update_redis_stats():
136+
"""Generate the system stats and save them in redis
137+
138+
Returns
139+
-------
140+
list of str
141+
artifact filepaths that are not present in the file system
142+
"""
143+
STUDY = qdb.study.Study
144+
studies = {'public': STUDY.get_by_status('private'),
145+
'private': STUDY.get_by_status('public'),
146+
'sanbox': STUDY.get_by_status('sandbox')}
147+
number_studies = {k: len(v) for k, v in viewitems(studies)}
148+
149+
number_of_samples = {}
150+
ebi_samples = {}
151+
for k, sts in viewitems(studies):
152+
number_of_samples[k] = 0
153+
for s in sts:
154+
st = s.sample_template
155+
if st is not None:
156+
number_of_samples[k] += len(list(st.keys()))
157+
158+
ebi_samples_count = 0
159+
for pt in s.prep_templates():
160+
ebi_samples_count += len([
161+
1 for _, v in viewitems(pt.ebi_experiment_accessions)
162+
if v is not None and v != ''])
163+
ebi_samples[s.id] = ebi_samples_count
164+
165+
num_users = qdb.util.get_count('qiita.qiita_user')
166+
167+
lat_longs = get_lat_longs()
168+
169+
num_studies_ebi = len(ebi_samples)
170+
num_samples_ebi = sum([v for _, v in viewitems(ebi_samples)])
171+
172+
# generating file size stats
173+
stats = []
174+
missing_files = []
175+
for k, sts in viewitems(studies):
176+
for s in sts:
177+
for a in s.artifacts():
178+
for _, fp, dt in a.filepaths:
179+
try:
180+
s = stat(fp)
181+
stats.append((dt, s.st_size, strftime('%Y-%m',
182+
localtime(s.st_ctime))))
183+
except:
184+
missing_files.append(fp)
185+
186+
summary = {}
187+
all_dates = []
188+
for ft, size, ym in stats:
189+
if ft not in summary:
190+
summary[ft] = {}
191+
if ym not in summary[ft]:
192+
summary[ft][ym] = 0
193+
all_dates.append(ym)
194+
summary[ft][ym] += size
195+
all_dates = sorted(set(all_dates))
196+
197+
# sorting summaries
198+
rm_from_data = ['html_summary', 'tgz', 'directory', 'raw_fasta', 'log',
199+
'biom', 'raw_sff', 'raw_qual']
200+
ordered_summary = {}
201+
for dt in summary:
202+
if dt in rm_from_data:
203+
continue
204+
new_list = []
205+
current_value = 0
206+
for ad in all_dates:
207+
if ad in summary[dt]:
208+
current_value += summary[dt][ad]
209+
new_list.append(current_value)
210+
ordered_summary[dt] = new_list
211+
212+
plot_order = sorted([(k, ordered_summary[k][-1]) for k in ordered_summary],
213+
key=lambda x: x[1])
214+
215+
# helper function to generate y axis, modified from:
216+
# http://stackoverflow.com/a/1094933
217+
def sizeof_fmt(value, position):
218+
number = None
219+
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
220+
if abs(value) < 1024.0:
221+
number = "%3.1f%s" % (value, unit)
222+
break
223+
value /= 1024.0
224+
if number is None:
225+
number = "%.1f%s" % (value, 'Yi')
226+
return number
227+
228+
all_dates_axis = range(len(all_dates))
229+
plt.locator_params(axis='y', nbins=10)
230+
plt.figure(figsize=(20, 10))
231+
for k, v in plot_order:
232+
plt.plot(all_dates_axis, ordered_summary[k], linewidth=2, label=k)
233+
234+
plt.xticks(all_dates_axis, all_dates)
235+
plt.legend()
236+
plt.grid()
237+
ax = plt.gca()
238+
ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(sizeof_fmt))
239+
plt.xlabel('Date')
240+
plt.ylabel('Storage space per data type')
241+
242+
plot = StringIO()
243+
plt.savefig(plot, format='png')
244+
plot.seek(0)
245+
img = '<img src = "%s"/>' % (
246+
'data:image/png;base64,' + quote(b64encode(plot.buf)))
247+
248+
portal = qiita_config.portal
249+
keys = [
250+
'number_studies', 'number_of_samples', 'num_users', 'lat_longs',
251+
'num_studies_ebi', 'num_samples_ebi', 'img']
252+
for k in keys:
253+
redis_key = '%s:stats:%s' % (portal, k)
254+
255+
# storing dicts
256+
if k == 'number_studies':
257+
r_client.hmset(redis_key, number_studies)
258+
elif k == 'number_of_samples':
259+
r_client.hmset(redis_key, number_of_samples)
260+
# single values
261+
elif k == 'num_users':
262+
r_client.set(redis_key, num_users)
263+
elif k == 'num_studies_ebi':
264+
r_client.set(redis_key, num_studies_ebi)
265+
elif k == 'num_samples_ebi':
266+
r_client.set(redis_key, num_samples_ebi)
267+
elif k == 'img':
268+
r_client.set(redis_key, img)
269+
# storing tuples
270+
elif k == 'lat_longs':
271+
r_client.set(redis_key, lat_longs)
272+
273+
return missing_files
274+
275+
125276
def get_lat_longs():
126277
"""Retrieve the latitude and longitude of all the samples in the DB
127278

qiita_db/test/test_meta_util.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import pandas as pd
1212

13+
from moi import r_client
1314
from qiita_core.qiita_settings import qiita_config
1415
from qiita_core.util import qiita_test_checker
1516

@@ -180,6 +181,59 @@ def test_get_lat_longs_EMP_portal(self):
180181

181182
self.assertItemsEqual(obs, exp)
182183

184+
def test_update_redis_stats(self):
185+
qdb.meta_util.update_redis_stats()
186+
187+
# checking values from redis
188+
portal = qiita_config.portal
189+
keys = [
190+
'number_studies', 'number_of_samples', 'num_users', 'lat_longs',
191+
'num_studies_ebi', 'num_samples_ebi', 'img']
192+
193+
for k in keys:
194+
redis_key = '%s:stats:%s' % (portal, k)
195+
# retrieving dicts
196+
if k == 'number_studies':
197+
data = r_client.hgetall(redis_key)
198+
self.assertEqual(data, {'sanbox': '2', 'public': '0',
199+
'private': '1'})
200+
elif k == 'number_of_samples':
201+
data = r_client.hgetall(redis_key)
202+
self.assertEqual(data, {'sanbox': '1', 'public': '0',
203+
'private': '27'})
204+
# single values
205+
elif k == 'num_users':
206+
data = r_client.get(redis_key)
207+
self.assertEqual(data, '4')
208+
elif k == 'num_studies_ebi':
209+
data = r_client.get(redis_key)
210+
self.assertEqual(data, '3')
211+
elif k == 'num_samples_ebi':
212+
data = r_client.get(redis_key)
213+
self.assertEqual(data, '54')
214+
elif k == 'img':
215+
# not testing image!
216+
data = r_client.get(redis_key)
217+
# storing tuples and single values
218+
elif k == 'lat_longs':
219+
data = r_client.get(redis_key)
220+
self.assertEqual(data, EXP_LAT_LONG)
221+
222+
223+
EXP_LAT_LONG = (
224+
'[[0.291867635913, 68.5945325743], [68.0991287718, 34.8360987059],'
225+
' [10.6655599093, 70.784770579], [40.8623799474, 6.66444220187],'
226+
' [13.089194595, 92.5274472082], [84.0030227585, 66.8954849864],'
227+
' [12.7065957714, 84.9722975792], [78.3634273709, 74.423907894],'
228+
' [82.8302905615, 86.3615778099], [53.5050692395, 31.6056761814],'
229+
' [43.9614715197, 82.8516734159], [29.1499460692, 82.1270418227],'
230+
' [23.1218032799, 42.838497795], [12.6245524972, 96.0693176066],'
231+
' [38.2627021402, 3.48274264219], [74.0894932572, 65.3283470202],'
232+
' [35.2374368957, 68.5041623253], [4.59216095574, 63.5115213108],'
233+
' [95.2060749748, 27.3592668624], [68.51099627, 2.35063674718],'
234+
' [85.4121476399, 15.6526750776], [60.1102854322, 74.7123248382],'
235+
' [3.21190859967, 26.8138925876], [57.571893782, 32.5563076447],'
236+
' [44.9725384282, 66.1920014699], [42.42, 41.41]]')
183237

184238
if __name__ == '__main__':
185239
main()

qiita_db/test/test_util.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from unittest import TestCase, main
1010
from tempfile import mkstemp
11-
from os import close, remove
11+
from os import close, remove, mkdir
1212
from os.path import join, exists, basename
1313
from shutil import rmtree
1414
from datetime import datetime
@@ -365,6 +365,20 @@ def _common_purge_filpeaths_test(self):
365365
def test_purge_filepaths(self):
366366
self._common_purge_filpeaths_test()
367367

368+
def test_empty_trash_upload_folder(self):
369+
# creating file to delete so we know it actually works
370+
study_id = '1'
371+
uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], study_id)
372+
trash = join(uploads_fp, 'trash')
373+
if not exists(trash):
374+
mkdir(trash)
375+
fp = join(trash, 'my_file_to_delete.txt')
376+
open(fp, 'w').close()
377+
378+
self.assertTrue(exists(fp))
379+
qdb.util.empty_trash_upload_folder()
380+
self.assertFalse(exists(fp))
381+
368382
def test_purge_filepaths_null_cols(self):
369383
# For more details about the source of the issue that motivates this
370384
# test: http://www.depesz.com/2008/08/13/nulls-vs-not-in/

qiita_db/util.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,17 @@ def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id):
714714
for fpid, fp, fp_type_, m, s in results]
715715

716716

717-
def purge_filepaths():
717+
def _rm_files(TRN, fp):
718+
# Remove the data
719+
if exists(fp):
720+
if isdir(fp):
721+
func = rmtree
722+
else:
723+
func = remove
724+
TRN.add_post_commit_func(func, fp)
725+
726+
727+
def purge_filepaths(delete_files=True):
718728
r"""Goes over the filepath table and remove all the filepaths that are not
719729
used in any place
720730
"""
@@ -751,18 +761,39 @@ def purge_filepaths():
751761
sql = "DELETE FROM qiita.filepath WHERE filepath_id=%s"
752762
db_results = qdb.sql_connection.TRN.execute_fetchindex()
753763
for fp_id, fp, fp_type, dd_id in db_results:
754-
qdb.sql_connection.TRN.add(sql, [fp_id])
764+
if delete_files:
765+
qdb.sql_connection.TRN.add(sql, [fp_id])
766+
fp = join(get_mountpoint_path_by_id(dd_id), fp)
767+
_rm_files(qdb.sql_connection.TRN, fp)
768+
else:
769+
print fp, fp_type
755770

756-
# Remove the data
757-
fp = join(get_mountpoint_path_by_id(dd_id), fp)
758-
if exists(fp):
759-
if fp_type is 'directory':
760-
func = rmtree
761-
else:
762-
func = remove
763-
qdb.sql_connection.TRN.add_post_commit_func(func, fp)
771+
if delete_files:
772+
qdb.sql_connection.TRN.execute()
764773

765-
qdb.sql_connection.TRN.execute()
774+
775+
def empty_trash_upload_folder(delete_files=True):
776+
r"""Delete all files in the trash folder inside each of the upload
777+
folders"""
778+
gfp = partial(join, get_db_files_base_dir())
779+
with qdb.sql_connection.TRN:
780+
sql = """SELECT mountpoint
781+
FROM qiita.data_directory
782+
WHERE data_type = 'uploads'"""
783+
qdb.sql_connection.TRN.add(sql)
784+
785+
for mp in qdb.sql_connection.TRN.execute_fetchflatten():
786+
for path, dirs, files in walk(gfp(mp)):
787+
if path.endswith('/trash'):
788+
if delete_files:
789+
for f in files:
790+
fp = join(path, f)
791+
_rm_files(qdb.sql_connection.TRN, fp)
792+
else:
793+
print files
794+
795+
if delete_files:
796+
qdb.sql_connection.TRN.execute()
766797

767798

768799
def move_filepaths_to_upload_folder(study_id, filepaths):

scripts/qiita-cron-job

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env python
2+
3+
# -----------------------------------------------------------------------------
4+
# Copyright (c) 2014--, The Qiita Development Team.
5+
#
6+
# Distributed under the terms of the BSD 3-clause License.
7+
#
8+
# The full license is in the file LICENSE, distributed with this software.
9+
# -----------------------------------------------------------------------------
10+
11+
from qiita_db.util import purge_filepaths, empty_trash_upload_folder
12+
from qiita_db.meta_util import update_redis_stats
13+
14+
15+
# This script will perform these jobs:
16+
# 1. purge_filepaths: remove files from that are leftover in the
17+
# qiita.filepath and are present in the filesystem
18+
# 2. empty_trash_upload_folder: remove files that are present in the trash
19+
# of the upload folders
20+
# 3. update_redis_stats: updates the redis stats information
21+
#
22+
# Note that is responsability of the Qiita install system admin to add to a
23+
# cron job this script and responsible to define how often it should run
24+
25+
26+
def call_purge_filepaths():
27+
"""remove files from that are leftover in the qiita.filepath and are
28+
present in the filesystem"""
29+
purge_filepaths(True)
30+
31+
32+
def call_empty_trash_upload_folder():
33+
"""updates the redis stats information"""
34+
empty_trash_upload_folder(True)
35+
36+
37+
def call_update_redis_stats():
38+
"""updates the redis stats information"""
39+
update_redis_stats()
40+
41+
42+
def main():
43+
call_purge_filepaths()
44+
call_empty_trash_upload_folder()
45+
call_update_redis_stats()
46+
47+
48+
if __name__ == "__main__":
49+
main()

0 commit comments

Comments
 (0)