Skip to content

Commit 19889f9

Browse files
antgonzaElDeveloper
authored andcommitted
Automatic jobs & new stats (#2057)
* fix #814, fix #1636 * fixing error in test-env * fixing stats.html call * adding img * addressing @josenavas comments * rm for loops * addresssing @ElDeveloper comments
1 parent 9eb9dbb commit 19889f9

File tree

8 files changed

+379
-87
lines changed

8 files changed

+379
-87
lines changed

.travis.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ script:
4040
- qiita-env start_cluster qiita-general
4141
- qiita-env make --no-load-ontologies
4242
- if [ ${TEST_ADD_STUDIES} == "True" ]; then test_data_studies/commands.sh ; fi
43+
- if [ ${TEST_ADD_STUDIES} == "True" ]; then qiita-cron-job ; fi
4344
- if [ ${TEST_ADD_STUDIES} == "False" ]; then qiita-test-install ; fi
4445
- if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests --with-doctest --with-coverage -v --cover-package=qiita_db,qiita_pet,qiita_core,qiita_ware; fi
45-
- flake8 qiita_* setup.py scripts/qiita scripts/qiita-env scripts/qiita-test-install
46+
- flake8 qiita_* setup.py scripts/*
4647
- ls -R /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_pet/support_files/doc/
4748
- qiita pet webserver
4849
addons:

qiita_db/meta_util.py

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,17 @@
2525
# -----------------------------------------------------------------------------
2626
from __future__ import division
2727

28+
from moi import r_client
29+
from os import stat
30+
from time import strftime, localtime
31+
import matplotlib.pyplot as plt
32+
import matplotlib as mpl
33+
from base64 import b64encode
34+
from urllib import quote
35+
from StringIO import StringIO
36+
from future.utils import viewitems
37+
from datetime import datetime
38+
2839
from qiita_core.qiita_settings import qiita_config
2940
import qiita_db as qdb
3041

@@ -122,6 +133,147 @@ def get_accessible_filepath_ids(user):
122133
return filepath_ids
123134

124135

136+
def update_redis_stats():
137+
"""Generate the system stats and save them in redis
138+
139+
Returns
140+
-------
141+
list of str
142+
artifact filepaths that are not present in the file system
143+
"""
144+
STUDY = qdb.study.Study
145+
studies = {'public': STUDY.get_by_status('private'),
146+
'private': STUDY.get_by_status('public'),
147+
'sanbox': STUDY.get_by_status('sandbox')}
148+
number_studies = {k: len(v) for k, v in viewitems(studies)}
149+
150+
number_of_samples = {}
151+
ebi_samples_prep = {}
152+
num_samples_ebi = 0
153+
for k, sts in viewitems(studies):
154+
number_of_samples[k] = 0
155+
for s in sts:
156+
st = s.sample_template
157+
if st is not None:
158+
number_of_samples[k] += len(list(st.keys()))
159+
160+
ebi_samples_prep_count = 0
161+
for pt in s.prep_templates():
162+
ebi_samples_prep_count += len([
163+
1 for _, v in viewitems(pt.ebi_experiment_accessions)
164+
if v is not None and v != ''])
165+
ebi_samples_prep[s.id] = ebi_samples_prep_count
166+
167+
if s.sample_template is not None:
168+
num_samples_ebi += len([
169+
1 for _, v in viewitems(
170+
s.sample_template.ebi_sample_accessions)
171+
if v is not None and v != ''])
172+
173+
num_users = qdb.util.get_count('qiita.qiita_user')
174+
175+
lat_longs = get_lat_longs()
176+
177+
num_studies_ebi = len(ebi_samples_prep)
178+
number_samples_ebi_prep = sum([v for _, v in viewitems(ebi_samples_prep)])
179+
180+
# generating file size stats
181+
stats = []
182+
missing_files = []
183+
for k, sts in viewitems(studies):
184+
for s in sts:
185+
for a in s.artifacts():
186+
for _, fp, dt in a.filepaths:
187+
try:
188+
s = stat(fp)
189+
stats.append((dt, s.st_size, strftime('%Y-%m',
190+
localtime(s.st_ctime))))
191+
except OSError:
192+
missing_files.append(fp)
193+
194+
summary = {}
195+
all_dates = []
196+
for ft, size, ym in stats:
197+
if ft not in summary:
198+
summary[ft] = {}
199+
if ym not in summary[ft]:
200+
summary[ft][ym] = 0
201+
all_dates.append(ym)
202+
summary[ft][ym] += size
203+
all_dates = sorted(set(all_dates))
204+
205+
# sorting summaries
206+
rm_from_data = ['html_summary', 'tgz', 'directory', 'raw_fasta', 'log',
207+
'biom', 'raw_sff', 'raw_qual']
208+
ordered_summary = {}
209+
for dt in summary:
210+
if dt in rm_from_data:
211+
continue
212+
new_list = []
213+
current_value = 0
214+
for ad in all_dates:
215+
if ad in summary[dt]:
216+
current_value += summary[dt][ad]
217+
new_list.append(current_value)
218+
ordered_summary[dt] = new_list
219+
220+
plot_order = sorted([(k, ordered_summary[k][-1]) for k in ordered_summary],
221+
key=lambda x: x[1])
222+
223+
# helper function to generate y axis, modified from:
224+
# http://stackoverflow.com/a/1094933
225+
def sizeof_fmt(value, position):
226+
number = None
227+
for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
228+
if abs(value) < 1024.0:
229+
number = "%3.1f%s" % (value, unit)
230+
break
231+
value /= 1024.0
232+
if number is None:
233+
number = "%.1f%s" % (value, 'Yi')
234+
return number
235+
236+
all_dates_axis = range(len(all_dates))
237+
plt.locator_params(axis='y', nbins=10)
238+
plt.figure(figsize=(20, 10))
239+
for k, v in plot_order:
240+
plt.plot(all_dates_axis, ordered_summary[k], linewidth=2, label=k)
241+
242+
plt.xticks(all_dates_axis, all_dates)
243+
plt.legend()
244+
plt.grid()
245+
ax = plt.gca()
246+
ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(sizeof_fmt))
247+
plt.xlabel('Date')
248+
plt.ylabel('Storage space per data type')
249+
250+
plot = StringIO()
251+
plt.savefig(plot, format='png')
252+
plot.seek(0)
253+
img = 'data:image/png;base64,' + quote(b64encode(plot.buf))
254+
255+
time = datetime.now().strftime('%m-%d-%y %H:%M:%S')
256+
257+
portal = qiita_config.portal
258+
vals = [
259+
('number_studies', number_studies, r_client.hmset),
260+
('number_of_samples', number_of_samples, r_client.hmset),
261+
('num_users', num_users, r_client.set),
262+
('lat_longs', lat_longs, r_client.set),
263+
('num_studies_ebi', num_studies_ebi, r_client.set),
264+
('num_samples_ebi', num_samples_ebi, r_client.set),
265+
('number_samples_ebi_prep', number_samples_ebi_prep, r_client.set),
266+
('img', img, r_client.set),
267+
('time', time, r_client.set)]
268+
for k, v, f in vals:
269+
redis_key = '%s:stats:%s' % (portal, k)
270+
# important to "flush" variables to avoid errors
271+
r_client.delete(redis_key)
272+
f(redis_key, v)
273+
274+
return missing_files
275+
276+
125277
def get_lat_longs():
126278
"""Retrieve the latitude and longitude of all the samples in the DB
127279
@@ -146,7 +298,9 @@ def get_lat_longs():
146298
sql = [('SELECT CAST(latitude AS FLOAT), '
147299
' CAST(longitude AS FLOAT) '
148300
'FROM qiita.%s '
149-
'WHERE isnumeric(latitude) AND isnumeric(latitude)' % s)
301+
'WHERE isnumeric(latitude) AND isnumeric(longitude) '
302+
"AND latitude <> 'NaN' "
303+
"AND longitude <> 'NaN' " % s)
150304
for s in qdb.sql_connection.TRN.execute_fetchflatten()]
151305
sql = ' UNION '.join(sql)
152306
qdb.sql_connection.TRN.add(sql)

qiita_db/test/test_meta_util.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import pandas as pd
1212

13+
from moi import r_client
1314
from qiita_core.qiita_settings import qiita_config
1415
from qiita_core.util import qiita_test_checker
1516

@@ -180,6 +181,43 @@ def test_get_lat_longs_EMP_portal(self):
180181

181182
self.assertItemsEqual(obs, exp)
182183

184+
def test_update_redis_stats(self):
185+
qdb.meta_util.update_redis_stats()
186+
187+
portal = qiita_config.portal
188+
vals = [
189+
('number_studies', {'sanbox': '2', 'public': '0',
190+
'private': '1'}, r_client.hgetall),
191+
('number_of_samples', {'sanbox': '1', 'public': '0',
192+
'private': '27'}, r_client.hgetall),
193+
('num_users', '4', r_client.get),
194+
('lat_longs', EXP_LAT_LONG, r_client.get),
195+
('num_studies_ebi', '3', r_client.get),
196+
('num_samples_ebi', '27', r_client.get),
197+
('number_samples_ebi_prep', '54', r_client.get)
198+
# not testing img/time for simplicity
199+
# ('img', r_client.get),
200+
# ('time', r_client.get)
201+
]
202+
for k, exp, f in vals:
203+
redis_key = '%s:stats:%s' % (portal, k)
204+
self.assertEqual(f(redis_key), exp)
205+
206+
207+
EXP_LAT_LONG = (
208+
'[[0.291867635913, 68.5945325743], [68.0991287718, 34.8360987059],'
209+
' [10.6655599093, 70.784770579], [40.8623799474, 6.66444220187],'
210+
' [13.089194595, 92.5274472082], [84.0030227585, 66.8954849864],'
211+
' [12.7065957714, 84.9722975792], [78.3634273709, 74.423907894],'
212+
' [82.8302905615, 86.3615778099], [53.5050692395, 31.6056761814],'
213+
' [43.9614715197, 82.8516734159], [29.1499460692, 82.1270418227],'
214+
' [23.1218032799, 42.838497795], [12.6245524972, 96.0693176066],'
215+
' [38.2627021402, 3.48274264219], [74.0894932572, 65.3283470202],'
216+
' [35.2374368957, 68.5041623253], [4.59216095574, 63.5115213108],'
217+
' [95.2060749748, 27.3592668624], [68.51099627, 2.35063674718],'
218+
' [85.4121476399, 15.6526750776], [60.1102854322, 74.7123248382],'
219+
' [3.21190859967, 26.8138925876], [57.571893782, 32.5563076447],'
220+
' [44.9725384282, 66.1920014699], [42.42, 41.41]]')
183221

184222
if __name__ == '__main__':
185223
main()

qiita_db/test/test_util.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from unittest import TestCase, main
1010
from tempfile import mkstemp
11-
from os import close, remove
11+
from os import close, remove, mkdir
1212
from os.path import join, exists, basename
1313
from shutil import rmtree
1414
from datetime import datetime
@@ -365,6 +365,20 @@ def _common_purge_filpeaths_test(self):
365365
def test_purge_filepaths(self):
366366
self._common_purge_filpeaths_test()
367367

368+
def test_empty_trash_upload_folder(self):
369+
# creating file to delete so we know it actually works
370+
study_id = '1'
371+
uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], study_id)
372+
trash = join(uploads_fp, 'trash')
373+
if not exists(trash):
374+
mkdir(trash)
375+
fp = join(trash, 'my_file_to_delete.txt')
376+
open(fp, 'w').close()
377+
378+
self.assertTrue(exists(fp))
379+
qdb.util.empty_trash_upload_folder()
380+
self.assertFalse(exists(fp))
381+
368382
def test_purge_filepaths_null_cols(self):
369383
# For more details about the source of the issue that motivates this
370384
# test: http://www.depesz.com/2008/08/13/nulls-vs-not-in/

qiita_db/util.py

Lines changed: 61 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -714,9 +714,24 @@ def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id):
714714
for fpid, fp, fp_type_, m, s in results]
715715

716716

717-
def purge_filepaths():
717+
def _rm_files(TRN, fp):
718+
# Remove the data
719+
if exists(fp):
720+
if isdir(fp):
721+
func = rmtree
722+
else:
723+
func = remove
724+
TRN.add_post_commit_func(func, fp)
725+
726+
727+
def purge_filepaths(delete_files=True):
718728
r"""Goes over the filepath table and remove all the filepaths that are not
719729
used in any place
730+
731+
Parameters
732+
----------
733+
delete_files : bool
734+
if True it will actually delete the files, if False print
720735
"""
721736
with qdb.sql_connection.TRN:
722737
# Get all the (table, column) pairs that reference to the filepath
@@ -739,30 +754,58 @@ def purge_filepaths():
739754
union_str = " UNION ".join(
740755
["SELECT %s FROM qiita.%s WHERE %s IS NOT NULL" % (col, table, col)
741756
for table, col in qdb.sql_connection.TRN.execute_fetchindex()])
742-
# Get all the filepaths from the filepath table that are not
743-
# referenced from any place in the database
744-
sql = """SELECT filepath_id, filepath, filepath_type, data_directory_id
745-
FROM qiita.filepath FP JOIN qiita.filepath_type FPT
746-
ON FP.filepath_type_id = FPT.filepath_type_id
747-
WHERE filepath_id NOT IN (%s)""" % union_str
748-
qdb.sql_connection.TRN.add(sql)
757+
if union_str:
758+
# Get all the filepaths from the filepath table that are not
759+
# referenced from any place in the database
760+
sql = """SELECT filepath_id, filepath, filepath_type, data_directory_id
761+
FROM qiita.filepath FP JOIN qiita.filepath_type FPT
762+
ON FP.filepath_type_id = FPT.filepath_type_id
763+
WHERE filepath_id NOT IN (%s)""" % union_str
764+
qdb.sql_connection.TRN.add(sql)
749765

750766
# We can now go over and remove all the filepaths
751767
sql = "DELETE FROM qiita.filepath WHERE filepath_id=%s"
752768
db_results = qdb.sql_connection.TRN.execute_fetchindex()
753769
for fp_id, fp, fp_type, dd_id in db_results:
754-
qdb.sql_connection.TRN.add(sql, [fp_id])
770+
if delete_files:
771+
qdb.sql_connection.TRN.add(sql, [fp_id])
772+
fp = join(get_mountpoint_path_by_id(dd_id), fp)
773+
_rm_files(qdb.sql_connection.TRN, fp)
774+
else:
775+
print fp, fp_type
755776

756-
# Remove the data
757-
fp = join(get_mountpoint_path_by_id(dd_id), fp)
758-
if exists(fp):
759-
if fp_type is 'directory':
760-
func = rmtree
761-
else:
762-
func = remove
763-
qdb.sql_connection.TRN.add_post_commit_func(func, fp)
777+
if delete_files:
778+
qdb.sql_connection.TRN.execute()
764779

765-
qdb.sql_connection.TRN.execute()
780+
781+
def empty_trash_upload_folder(delete_files=True):
782+
r"""Delete all files in the trash folder inside each of the upload
783+
folders
784+
785+
Parameters
786+
----------
787+
delete_files : bool
788+
if True it will actually delete the files, if False print
789+
"""
790+
gfp = partial(join, get_db_files_base_dir())
791+
with qdb.sql_connection.TRN:
792+
sql = """SELECT mountpoint
793+
FROM qiita.data_directory
794+
WHERE data_type = 'uploads'"""
795+
qdb.sql_connection.TRN.add(sql)
796+
797+
for mp in qdb.sql_connection.TRN.execute_fetchflatten():
798+
for path, dirs, files in walk(gfp(mp)):
799+
if path.endswith('/trash'):
800+
if delete_files:
801+
for f in files:
802+
fp = join(path, f)
803+
_rm_files(qdb.sql_connection.TRN, fp)
804+
else:
805+
print files
806+
807+
if delete_files:
808+
qdb.sql_connection.TRN.execute()
766809

767810

768811
def move_filepaths_to_upload_folder(study_id, filepaths):

0 commit comments

Comments
 (0)