qiita-spots · ElDeveloper · Jan 27, 2017 · Jan 24, 2017 · Jan 24, 2017 · Jan 25, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -40,9 +40,10 @@ script:
   - qiita-env start_cluster qiita-general
   - qiita-env make --no-load-ontologies
   - if [ ${TEST_ADD_STUDIES} == "True" ]; then test_data_studies/commands.sh ; fi
+  - if [ ${TEST_ADD_STUDIES} == "True" ]; then qiita-cron-job ; fi
   - if [ ${TEST_ADD_STUDIES} == "False" ]; then qiita-test-install ; fi
   - if [ ${TEST_ADD_STUDIES} == "False" ]; then nosetests --with-doctest --with-coverage -v --cover-package=qiita_db,qiita_pet,qiita_core,qiita_ware; fi
-  - flake8 qiita_* setup.py scripts/qiita scripts/qiita-env scripts/qiita-test-install
+  - flake8 qiita_* setup.py scripts/*
   - ls -R /home/travis/miniconda3/envs/qiita/lib/python2.7/site-packages/qiita_pet/support_files/doc/
   - qiita pet webserver
 addons:

diff --git a/qiita_db/meta_util.py b/qiita_db/meta_util.py
@@ -25,6 +25,17 @@
 # -----------------------------------------------------------------------------
 from __future__ import division
 
+from moi import r_client
+from os import stat
+from time import strftime, localtime
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+from base64 import b64encode
+from urllib import quote
+from StringIO import StringIO
+from future.utils import viewitems
+from datetime import datetime
+
 from qiita_core.qiita_settings import qiita_config
 import qiita_db as qdb
 
@@ -122,6 +133,147 @@ def get_accessible_filepath_ids(user):
         return filepath_ids
 
 
+def update_redis_stats():
+    """Generate the system stats and save them in redis
+
+    Returns
+    -------
+    list of str
+        artifact filepaths that are not present in the file system
+    """
+    STUDY = qdb.study.Study
+    studies = {'public': STUDY.get_by_status('private'),
+               'private': STUDY.get_by_status('public'),
+               'sanbox': STUDY.get_by_status('sandbox')}
+    number_studies = {k: len(v) for k, v in viewitems(studies)}
+
+    number_of_samples = {}
+    ebi_samples_prep = {}
+    num_samples_ebi = 0
+    for k, sts in viewitems(studies):
+        number_of_samples[k] = 0
+        for s in sts:
+            st = s.sample_template
+            if st is not None:
+                number_of_samples[k] += len(list(st.keys()))
+
+            ebi_samples_prep_count = 0
+            for pt in s.prep_templates():
+                ebi_samples_prep_count += len([
+                    1 for _, v in viewitems(pt.ebi_experiment_accessions)
+                    if v is not None and v != ''])
+            ebi_samples_prep[s.id] = ebi_samples_prep_count
+
+            if s.sample_template is not None:
+                num_samples_ebi += len([
+                    1 for _, v in viewitems(
+                        s.sample_template.ebi_sample_accessions)
+                    if v is not None and v != ''])
+
+    num_users = qdb.util.get_count('qiita.qiita_user')
+
+    lat_longs = get_lat_longs()
+
+    num_studies_ebi = len(ebi_samples_prep)
+    number_samples_ebi_prep = sum([v for _, v in viewitems(ebi_samples_prep)])
+
+    # generating file size stats
+    stats = []
+    missing_files = []
+    for k, sts in viewitems(studies):
+        for s in sts:
+            for a in s.artifacts():
+                for _, fp, dt in a.filepaths:
+                    try:
+                        s = stat(fp)
+                        stats.append((dt, s.st_size, strftime('%Y-%m',
+                                      localtime(s.st_ctime))))
+                    except OSError:
+                        missing_files.append(fp)
+
+    summary = {}
+    all_dates = []
+    for ft, size, ym in stats:
+        if ft not in summary:
+            summary[ft] = {}
+        if ym not in summary[ft]:
+            summary[ft][ym] = 0
+            all_dates.append(ym)
+        summary[ft][ym] += size
+    all_dates = sorted(set(all_dates))
+
+    # sorting summaries
+    rm_from_data = ['html_summary', 'tgz', 'directory', 'raw_fasta', 'log',
+                    'biom', 'raw_sff', 'raw_qual']
+    ordered_summary = {}
+    for dt in summary:
+        if dt in rm_from_data:
+            continue
+        new_list = []
+        current_value = 0
+        for ad in all_dates:
+            if ad in summary[dt]:
+                current_value += summary[dt][ad]
+            new_list.append(current_value)
+        ordered_summary[dt] = new_list
+
+    plot_order = sorted([(k, ordered_summary[k][-1]) for k in ordered_summary],
+                        key=lambda x: x[1])
+
+    # helper function to generate y axis, modified from:
+    # http://stackoverflow.com/a/1094933
+    def sizeof_fmt(value, position):
+        number = None
+        for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
+            if abs(value) < 1024.0:
+                number = "%3.1f%s" % (value, unit)
+                break
+            value /= 1024.0
+        if number is None:
+            number = "%.1f%s" % (value, 'Yi')
+        return number
+
+    all_dates_axis = range(len(all_dates))
+    plt.locator_params(axis='y', nbins=10)
+    plt.figure(figsize=(20, 10))
+    for k, v in plot_order:
+        plt.plot(all_dates_axis, ordered_summary[k], linewidth=2, label=k)
+
+    plt.xticks(all_dates_axis, all_dates)
+    plt.legend()
+    plt.grid()
+    ax = plt.gca()
+    ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(sizeof_fmt))
+    plt.xlabel('Date')
+    plt.ylabel('Storage space per data type')
+
+    plot = StringIO()
+    plt.savefig(plot, format='png')
+    plot.seek(0)
+    img = 'data:image/png;base64,' + quote(b64encode(plot.buf))
+
+    time = datetime.now().strftime('%m-%d-%y %H:%M:%S')
+
+    portal = qiita_config.portal
+    vals = [
+        ('number_studies', number_studies, r_client.hmset),
+        ('number_of_samples', number_of_samples, r_client.hmset),
+        ('num_users', num_users, r_client.set),
+        ('lat_longs', lat_longs, r_client.set),
+        ('num_studies_ebi', num_studies_ebi, r_client.set),
+        ('num_samples_ebi', num_samples_ebi, r_client.set),
+        ('number_samples_ebi_prep', number_samples_ebi_prep, r_client.set),
+        ('img', img, r_client.set),
+        ('time', time, r_client.set)]
+    for k, v, f in vals:
+        redis_key = '%s:stats:%s' % (portal, k)
+        # important to "flush" variables to avoid errors
+        r_client.delete(redis_key)
+        f(redis_key, v)
+
+    return missing_files
+
+
 def get_lat_longs():
     """Retrieve the latitude and longitude of all the samples in the DB
 
@@ -146,7 +298,9 @@ def get_lat_longs():
         sql = [('SELECT CAST(latitude AS FLOAT), '
                 '       CAST(longitude AS FLOAT) '
                 'FROM qiita.%s '
-                'WHERE isnumeric(latitude) AND isnumeric(latitude)' % s)
+                'WHERE isnumeric(latitude) AND isnumeric(longitude) '
+                "AND latitude <> 'NaN' "
+                "AND longitude <> 'NaN' " % s)
                for s in qdb.sql_connection.TRN.execute_fetchflatten()]
         sql = ' UNION '.join(sql)
         qdb.sql_connection.TRN.add(sql)

diff --git a/qiita_db/test/test_meta_util.py b/qiita_db/test/test_meta_util.py
@@ -10,6 +10,7 @@
 
 import pandas as pd
 
+from moi import r_client
 from qiita_core.qiita_settings import qiita_config
 from qiita_core.util import qiita_test_checker
 
@@ -180,6 +181,43 @@ def test_get_lat_longs_EMP_portal(self):
 
         self.assertItemsEqual(obs, exp)
 
+    def test_update_redis_stats(self):
+        qdb.meta_util.update_redis_stats()
+
+        portal = qiita_config.portal
+        vals = [
+            ('number_studies', {'sanbox': '2', 'public': '0',
+                                'private': '1'}, r_client.hgetall),
+            ('number_of_samples', {'sanbox': '1', 'public': '0',
+                                   'private': '27'}, r_client.hgetall),
+            ('num_users', '4', r_client.get),
+            ('lat_longs', EXP_LAT_LONG, r_client.get),
+            ('num_studies_ebi', '3', r_client.get),
+            ('num_samples_ebi', '27', r_client.get),
+            ('number_samples_ebi_prep', '54', r_client.get)
+            # not testing img/time for simplicity
+            # ('img', r_client.get),
+            # ('time', r_client.get)
+            ]
+        for k, exp, f in vals:
+            redis_key = '%s:stats:%s' % (portal, k)
+            self.assertEqual(f(redis_key), exp)
+
+
+EXP_LAT_LONG = (
+    '[[0.291867635913, 68.5945325743], [68.0991287718, 34.8360987059],'
+    ' [10.6655599093, 70.784770579], [40.8623799474, 6.66444220187],'
+    ' [13.089194595, 92.5274472082], [84.0030227585, 66.8954849864],'
+    ' [12.7065957714, 84.9722975792], [78.3634273709, 74.423907894],'
+    ' [82.8302905615, 86.3615778099], [53.5050692395, 31.6056761814],'
+    ' [43.9614715197, 82.8516734159], [29.1499460692, 82.1270418227],'
+    ' [23.1218032799, 42.838497795], [12.6245524972, 96.0693176066],'
+    ' [38.2627021402, 3.48274264219], [74.0894932572, 65.3283470202],'
+    ' [35.2374368957, 68.5041623253], [4.59216095574, 63.5115213108],'
+    ' [95.2060749748, 27.3592668624], [68.51099627, 2.35063674718],'
+    ' [85.4121476399, 15.6526750776], [60.1102854322, 74.7123248382],'
+    ' [3.21190859967, 26.8138925876], [57.571893782, 32.5563076447],'
+    ' [44.9725384282, 66.1920014699], [42.42, 41.41]]')
 
 if __name__ == '__main__':
     main()
diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py
@@ -8,7 +8,7 @@
 
 from unittest import TestCase, main
 from tempfile import mkstemp
-from os import close, remove
+from os import close, remove, mkdir
 from os.path import join, exists, basename
 from shutil import rmtree
 from datetime import datetime
@@ -365,6 +365,20 @@ def _common_purge_filpeaths_test(self):
     def test_purge_filepaths(self):
         self._common_purge_filpeaths_test()
 
+    def test_empty_trash_upload_folder(self):
+        # creating file to delete so we know it actually works
+        study_id = '1'
+        uploads_fp = join(qdb.util.get_mountpoint("uploads")[0][1], study_id)
+        trash = join(uploads_fp, 'trash')
+        if not exists(trash):
+            mkdir(trash)
+        fp = join(trash, 'my_file_to_delete.txt')
+        open(fp, 'w').close()
+
+        self.assertTrue(exists(fp))
+        qdb.util.empty_trash_upload_folder()
+        self.assertFalse(exists(fp))
+
     def test_purge_filepaths_null_cols(self):
         # For more details about the source of the issue that motivates this
         # test: http://www.depesz.com/2008/08/13/nulls-vs-not-in/

diff --git a/qiita_db/util.py b/qiita_db/util.py
@@ -714,9 +714,24 @@ def path_builder(db_dir, filepath, mountpoint, subdirectory, obj_id):
                 for fpid, fp, fp_type_, m, s in results]
 
 
-def purge_filepaths():
+def _rm_files(TRN, fp):
+    # Remove the data
+    if exists(fp):
+        if isdir(fp):
+            func = rmtree
+        else:
+            func = remove
+        TRN.add_post_commit_func(func, fp)
+
+
+def purge_filepaths(delete_files=True):
     r"""Goes over the filepath table and remove all the filepaths that are not
     used in any place
+
+    Parameters
+    ----------
+    delete_files : bool
+        if True it will actually delete the files, if False print
     """
     with qdb.sql_connection.TRN:
         # Get all the (table, column) pairs that reference to the filepath
@@ -739,30 +754,58 @@ def purge_filepaths():
         union_str = " UNION ".join(
             ["SELECT %s FROM qiita.%s WHERE %s IS NOT NULL" % (col, table, col)
              for table, col in qdb.sql_connection.TRN.execute_fetchindex()])
-        # Get all the filepaths from the filepath table that are not
-        # referenced from any place in the database
-        sql = """SELECT filepath_id, filepath, filepath_type, data_directory_id
-            FROM qiita.filepath FP JOIN qiita.filepath_type FPT
-                ON FP.filepath_type_id = FPT.filepath_type_id
-            WHERE filepath_id NOT IN (%s)""" % union_str
-        qdb.sql_connection.TRN.add(sql)
+        if union_str:
+            # Get all the filepaths from the filepath table that are not
+            # referenced from any place in the database
+            sql = """SELECT filepath_id, filepath, filepath_type, data_directory_id
+                FROM qiita.filepath FP JOIN qiita.filepath_type FPT
+                    ON FP.filepath_type_id = FPT.filepath_type_id
+                WHERE filepath_id NOT IN (%s)""" % union_str
+            qdb.sql_connection.TRN.add(sql)
 
         # We can now go over and remove all the filepaths
         sql = "DELETE FROM qiita.filepath WHERE filepath_id=%s"
         db_results = qdb.sql_connection.TRN.execute_fetchindex()
         for fp_id, fp, fp_type, dd_id in db_results:
-            qdb.sql_connection.TRN.add(sql, [fp_id])
+            if delete_files:
+                qdb.sql_connection.TRN.add(sql, [fp_id])
+                fp = join(get_mountpoint_path_by_id(dd_id), fp)
+                _rm_files(qdb.sql_connection.TRN, fp)
+            else:
+                print fp, fp_type
 
-            # Remove the data
-            fp = join(get_mountpoint_path_by_id(dd_id), fp)
-            if exists(fp):
-                if fp_type is 'directory':
-                    func = rmtree
-                else:
-                    func = remove
-                qdb.sql_connection.TRN.add_post_commit_func(func, fp)
+        if delete_files:
+            qdb.sql_connection.TRN.execute()
 
-        qdb.sql_connection.TRN.execute()
+
+def empty_trash_upload_folder(delete_files=True):
+    r"""Delete all files in the trash folder inside each of the upload
+    folders
+
+    Parameters
+    ----------
+    delete_files : bool
+        if True it will actually delete the files, if False print
+    """
+    gfp = partial(join, get_db_files_base_dir())
+    with qdb.sql_connection.TRN:
+        sql = """SELECT mountpoint
+                 FROM qiita.data_directory
+                 WHERE data_type = 'uploads'"""
+        qdb.sql_connection.TRN.add(sql)
+
+        for mp in qdb.sql_connection.TRN.execute_fetchflatten():
+            for path, dirs, files in walk(gfp(mp)):
+                if path.endswith('/trash'):
+                    if delete_files:
+                        for f in files:
+                            fp = join(path, f)
+                            _rm_files(qdb.sql_connection.TRN, fp)
+                    else:
+                        print files
+
+        if delete_files:
+            qdb.sql_connection.TRN.execute()
 
 
 def move_filepaths_to_upload_folder(study_id, filepaths):