qiita-spots · josenavas · Jan 2, 2017 · Jan 3, 2017 · Jan 3, 2017 · Jan 4, 2017
diff --git a/qiita_db/analysis.py b/qiita_db/analysis.py
@@ -212,8 +212,12 @@ def delete(cls, _id):
             qdb.sql_connection.TRN.add(sql, [_id])
             if qdb.sql_connection.TRN.execute_fetchlast():
                 raise qdb.exceptions.QiitaDBOperationNotPermittedError(
+<<<<<<< HEAD
+                    "Can't delete analysis %d, has artifacts attached")
+=======
                     "Can't delete analysis %d, has artifacts attached"
                     % _id)
+>>>>>>> ee170a08ec44fceb6c20b278279b8ce4b3d10a89
 
             sql = "DELETE FROM qiita.analysis_filepath WHERE {0} = %s".format(
                 cls._analysis_id_column)
@@ -514,6 +518,10 @@ def pmid(self, pmid):
             qdb.sql_connection.TRN.add(sql, [pmid, self._id])
             qdb.sql_connection.TRN.execute()
 
+<<<<<<< HEAD
+    # ---- Functions ----
+=======
+>>>>>>> ee170a08ec44fceb6c20b278279b8ce4b3d10a89
     def add_artifact(self, artifact):
         """Adds an artifact to the analysis
 
@@ -776,9 +784,15 @@ def build_files(self, merge_duplicated_sample_ids):
             self._build_mapping_file(samples, rename_dup_samples)
             biom_files = self._build_biom_tables(
                 grouped_samples, rename_dup_samples)
+<<<<<<< HEAD
 
             return biom_files
 
+=======
+
+            return biom_files
+
+>>>>>>> ee170a08ec44fceb6c20b278279b8ce4b3d10a89
     def _build_biom_tables(self, grouped_samples, rename_dup_samples=False):
         """Build tables and add them to the analysis"""
         with qdb.sql_connection.TRN:

diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py
@@ -11,6 +11,7 @@
 from itertools import chain
 from datetime import datetime
 from os import remove
+from collections import Counter
 
 import networkx as nx
 
@@ -284,10 +285,17 @@ def create(cls, filepaths, artifact_type, name=None, prep_template=None,
                 "at least one filepath is required.")
 
         # Check that the combination of parameters is correct
+<<<<<<< HEAD
+        counts = Counter([bool(parents or processing_parameters),
+                          prep_template is not None,
+                          bool(analysis or data_type)])
+        if counts[True] != 1:
+=======
         counts = (int(bool(parents or processing_parameters)) +
                   int(prep_template is not None) +
                   int(bool(analysis or data_type)))
         if counts != 1:
+>>>>>>> ee170a08ec44fceb6c20b278279b8ce4b3d10a89
             # More than one parameter has been provided
             raise qdb.exceptions.QiitaDBArtifactCreationError(
                 "One and only one of parents, prep template or analysis must "
@@ -363,12 +371,24 @@ def _associate_with_analysis(instance, analysis_id):
                 # If an artifact has parents, it can be either from the
                 # processing pipeline or the analysis pipeline. Decide which
                 # one here
+<<<<<<< HEAD
+                studies = set()
+                analyses = set()
+                for p in parents:
+                    s = p.study
+                    a = p.analysis
+                    if s is not None:
+                        studies.add(s.id)
+                    if a is not None:
+                        analyses.add(a.id)
+=======
                 studies = {p.study for p in parents}
                 analyses = {p.analysis for p in parents}
                 studies.discard(None)
                 analyses.discard(None)
                 studies = {s.id for s in studies}
                 analyses = {a.id for a in analyses}
+>>>>>>> ee170a08ec44fceb6c20b278279b8ce4b3d10a89
 
                 # The first 2 cases should never happen, but it doesn't hurt
                 # to check them
@@ -429,7 +449,10 @@ def _associate_with_analysis(instance, analysis_id):
                     instance.visibility = 'private'
                 else:
                     instance.visibility = 'public'
+<<<<<<< HEAD
+=======
 
+>>>>>>> ee170a08ec44fceb6c20b278279b8ce4b3d10a89
             elif prep_template:
                 # This artifact is uploaded by the user in the
                 # processing pipeline

diff --git a/qiita_db/meta_util.py b/qiita_db/meta_util.py
@@ -145,6 +145,171 @@ def validate_filepath_access_by_user(user, filepath_id):
         return False
 
 
+def update_redis_stats():
+    """Generate the system stats and save them in redis
+
+<<<<<<< HEAD
+            # Then add the filepaths of the sample template
+            study = artifact.study
+            if study:
+                filepath_ids.update(
+                    {fid
+                     for fid, _ in study.sample_template.get_filepaths()})
+
+        # Next, analyses
+        # Same as before, there are public, private, and shared
+        analyses = qdb.analysis.Analysis.get_by_status('public') | \
+            user.private_analyses | user.shared_analyses
+
+        if analyses:
+            sql = """SELECT filepath_id
+                     FROM qiita.analysis_filepath
+                     WHERE analysis_id IN %s"""
+            sql_args = tuple([a.id for a in analyses])
+            qdb.sql_connection.TRN.add(sql, [sql_args])
+            filepath_ids.update(qdb.sql_connection.TRN.execute_fetchflatten())
+
+        return filepath_ids
+=======
+    Returns
+    -------
+    list of str
+        artifact filepaths that are not present in the file system
+    """
+    STUDY = qdb.study.Study
+    studies = {'public': STUDY.get_by_status('private'),
+               'private': STUDY.get_by_status('public'),
+               'sanbox': STUDY.get_by_status('sandbox')}
+    number_studies = {k: len(v) for k, v in viewitems(studies)}
+
+    number_of_samples = {}
+    ebi_samples_prep = {}
+    num_samples_ebi = 0
+    for k, sts in viewitems(studies):
+        number_of_samples[k] = 0
+        for s in sts:
+            st = s.sample_template
+            if st is not None:
+                number_of_samples[k] += len(list(st.keys()))
+
+            ebi_samples_prep_count = 0
+            for pt in s.prep_templates():
+                ebi_samples_prep_count += len([
+                    1 for _, v in viewitems(pt.ebi_experiment_accessions)
+                    if v is not None and v != ''])
+            ebi_samples_prep[s.id] = ebi_samples_prep_count
+
+            if s.sample_template is not None:
+                num_samples_ebi += len([
+                    1 for _, v in viewitems(
+                        s.sample_template.ebi_sample_accessions)
+                    if v is not None and v != ''])
+
+    num_users = qdb.util.get_count('qiita.qiita_user')
+
+    lat_longs = get_lat_longs()
+
+    num_studies_ebi = len(ebi_samples_prep)
+    number_samples_ebi_prep = sum([v for _, v in viewitems(ebi_samples_prep)])
+
+    # generating file size stats
+    stats = []
+    missing_files = []
+    for k, sts in viewitems(studies):
+        for s in sts:
+            for a in s.artifacts():
+                for _, fp, dt in a.filepaths:
+                    try:
+                        s = stat(fp)
+                        stats.append((dt, s.st_size, strftime('%Y-%m',
+                                      localtime(s.st_ctime))))
+                    except OSError:
+                        missing_files.append(fp)
+
+    summary = {}
+    all_dates = []
+    for ft, size, ym in stats:
+        if ft not in summary:
+            summary[ft] = {}
+        if ym not in summary[ft]:
+            summary[ft][ym] = 0
+            all_dates.append(ym)
+        summary[ft][ym] += size
+    all_dates = sorted(set(all_dates))
+
+    # sorting summaries
+    rm_from_data = ['html_summary', 'tgz', 'directory', 'raw_fasta', 'log',
+                    'biom', 'raw_sff', 'raw_qual']
+    ordered_summary = {}
+    for dt in summary:
+        if dt in rm_from_data:
+            continue
+        new_list = []
+        current_value = 0
+        for ad in all_dates:
+            if ad in summary[dt]:
+                current_value += summary[dt][ad]
+            new_list.append(current_value)
+        ordered_summary[dt] = new_list
+
+    plot_order = sorted([(k, ordered_summary[k][-1]) for k in ordered_summary],
+                        key=lambda x: x[1])
+
+    # helper function to generate y axis, modified from:
+    # http://stackoverflow.com/a/1094933
+    def sizeof_fmt(value, position):
+        number = None
+        for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
+            if abs(value) < 1024.0:
+                number = "%3.1f%s" % (value, unit)
+                break
+            value /= 1024.0
+        if number is None:
+            number = "%.1f%s" % (value, 'Yi')
+        return number
+
+    all_dates_axis = range(len(all_dates))
+    plt.locator_params(axis='y', nbins=10)
+    plt.figure(figsize=(20, 10))
+    for k, v in plot_order:
+        plt.plot(all_dates_axis, ordered_summary[k], linewidth=2, label=k)
+
+    plt.xticks(all_dates_axis, all_dates)
+    plt.legend()
+    plt.grid()
+    ax = plt.gca()
+    ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(sizeof_fmt))
+    plt.xlabel('Date')
+    plt.ylabel('Storage space per data type')
+
+    plot = StringIO()
+    plt.savefig(plot, format='png')
+    plot.seek(0)
+    img = 'data:image/png;base64,' + quote(b64encode(plot.buf))
+
+    time = datetime.now().strftime('%m-%d-%y %H:%M:%S')
+
+    portal = qiita_config.portal
+    vals = [
+        ('number_studies', number_studies, r_client.hmset),
+        ('number_of_samples', number_of_samples, r_client.hmset),
+        ('num_users', num_users, r_client.set),
+        ('lat_longs', lat_longs, r_client.set),
+        ('num_studies_ebi', num_studies_ebi, r_client.set),
+        ('num_samples_ebi', num_samples_ebi, r_client.set),
+        ('number_samples_ebi_prep', number_samples_ebi_prep, r_client.set),
+        ('img', img, r_client.set),
+        ('time', time, r_client.set)]
+    for k, v, f in vals:
+        redis_key = '%s:stats:%s' % (portal, k)
+        # important to "flush" variables to avoid errors
+        r_client.delete(redis_key)
+        f(redis_key, v)
+
+    return missing_files
+>>>>>>> ee170a08ec44fceb6c20b278279b8ce4b3d10a89
+
+
 def update_redis_stats():
     """Generate the system stats and save them in redis
 

diff --git a/qiita_db/support_files/patches/50.sql b/qiita_db/support_files/patches/50.sql
@@ -1,3 +1,120 @@
+<<<<<<< HEAD
+-- Jan 5, 2017
+-- Move the analysis to the plugin system. This is a major rewrite of the
+-- database backend that supports the analysis pipeline.
+-- After exploring the data on the database, we realized that
+-- there are a lot of inconsistencies in the data. Unfortunately, this
+-- makes the process of transferring the data from the old structure
+-- to the new one a bit more challenging, as we will need to handle
+-- different special cases. Furthermore, all the information needed is not
+-- present in the database, since it requires checking BIOM files. Due to these
+-- reason, the vast majority of the data transfer is done in the python patch
+-- 47.py
+
+-- In this file we are just creating the new data structures. The old
+-- datastructure will be dropped in the python patch once all data has been
+-- transferred.
+
+-- Create the new data structures
+
+-- Table that links the analysis with the initial set of artifacts
+CREATE TABLE qiita.analysis_artifact (
+    analysis_id         bigint NOT NULL,
+    artifact_id         bigint NOT NULL,
+    CONSTRAINT idx_analysis_artifact_0 PRIMARY KEY (analysis_id, artifact_id)
+);
+CREATE INDEX idx_analysis_artifact_analysis ON qiita.analysis_artifact (analysis_id);
+CREATE INDEX idx_analysis_artifact_artifact ON qiita.analysis_artifact (artifact_id);
+ALTER TABLE qiita.analysis_artifact ADD CONSTRAINT fk_analysis_artifact_analysis FOREIGN KEY ( analysis_id ) REFERENCES qiita.analysis( analysis_id );
+ALTER TABLE qiita.analysis_artifact ADD CONSTRAINT fk_analysis_artifact_artifact FOREIGN KEY ( artifact_id ) REFERENCES qiita.artifact( artifact_id );
+
+-- Droping the analysis status column cause now it depends on the artifacts
+-- status, like the study does.
+ALTER TABLE qiita.analysis DROP COLUMN analysis_status_id;
+
+-- Create a table to link the analysis with the jobs that create the initial
+-- artifacts
+CREATE TABLE qiita.analysis_processing_job (
+	analysis_id          bigint  NOT NULL,
+	processing_job_id    uuid    NOT NULL,
+	CONSTRAINT idx_analysis_processing_job PRIMARY KEY ( analysis_id, processing_job_id )
+ ) ;
+
+CREATE INDEX idx_analysis_processing_job_analysis ON qiita.analysis_processing_job ( analysis_id ) ;
+CREATE INDEX idx_analysis_processing_job_pj ON qiita.analysis_processing_job ( processing_job_id ) ;
+ALTER TABLE qiita.analysis_processing_job ADD CONSTRAINT fk_analysis_processing_job FOREIGN KEY ( analysis_id ) REFERENCES qiita.analysis( analysis_id )    ;
+ALTER TABLE qiita.analysis_processing_job ADD CONSTRAINT fk_analysis_processing_job_pj FOREIGN KEY ( processing_job_id ) REFERENCES qiita.processing_job( processing_job_id )    ;
+
+-- Add a logging column in the analysis
+ALTER TABLE qiita.analysis ADD logging_id bigint  ;
+CREATE INDEX idx_analysis_0 ON qiita.analysis ( logging_id ) ;
+ALTER TABLE qiita.analysis ADD CONSTRAINT fk_analysis_logging FOREIGN KEY ( logging_id ) REFERENCES qiita.logging( logging_id )    ;
+
+-- We can handle some of the special cases here, so we simplify the work in the
+-- python patch
+
+-- Special case 1: there are jobs in the database that do not contain
+-- any information about the options used to process those parameters.
+-- However, these jobs do not have any results and all are marked either
+-- as queued or error, although no error log has been saved. Since these
+-- jobs are mainly useleess, we are going to remove them from the system
+DELETE FROM qiita.analysis_job
+    WHERE job_id IN (SELECT job_id FROM qiita.job WHERE options = '{}');
+DELETE FROM qiita.job WHERE options = '{}';
+
+-- Special case 2: there are a fair amount of jobs (719 last time I
+-- checked) that are not attached to any analysis. Not sure how this
+-- can happen, but these orphan jobs can't be accessed from anywhere
+-- in the interface. Remove them from the system. Note that we are
+-- unlinking the files but we are not removing them from the filepath
+-- table. We will do that on the patch 47.py using the
+-- purge_filepaths function, as it will make sure that those files are
+-- not used anywhere else
+DELETE FROM qiita.job_results_filepath WHERE job_id IN (
+    SELECT job_id FROM qiita.job J WHERE NOT EXISTS (
+        SELECT * FROM qiita.analysis_job AJ WHERE J.job_id = AJ.job_id));
+DELETE FROM qiita.job J WHERE NOT EXISTS (
+    SELECT * FROM qiita.analysis_job AJ WHERE J.job_id = AJ.job_id);
+
+-- In the analysis pipeline, an artifact can have mutliple datatypes
+-- (e.g. procrustes). Allow this by creating a new data_type being "multiomic"
+INSERT INTO qiita.data_type (data_type) VALUES ('Multiomic');
+
+
+-- The valdiate command from BIOM will have an extra parameter, analysis
+-- Magic number -> 4 BIOM command_id -> known for sure since it was added in
+-- patch 36.sql
+INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required)
+    VALUES (4, 'analysis', 'analysis', FALSE);
+-- The template comand now becomes optional, since it can be added either to
+-- an analysis or to a prep template. command_parameter_id known from patch
+-- 36.sql
+UPDATE qiita.command_parameter SET required = FALSE WHERE command_parameter_id = 34;
+
+-- We are going to add a new special software type, and a new software.
+-- This is going to be used internally by Qiita, so submit the private jobs.
+-- This is needed for the analysis.
+INSERT INTO qiita.software_type (software_type, description)
+    VALUES ('private', 'Internal Qiita jobs');
+
+DO $do$
+DECLARE
+    qiita_sw_id     bigint;
+    baf_cmd_id      bigint;
+BEGIN
+    INSERT INTO qiita.software (name, version, description, environment_script, start_script, software_type_id, active)
+        VALUES ('Qiita', 'alpha', 'Internal Qiita jobs', 'source activate qiita', 'qiita-private-2', 3, True)
+        RETURNING software_id INTO qiita_sw_id;
+
+    INSERT INTO qiita.software_command (software_id, name, description)
+        VALUES (qiita_sw_id, 'build_analysis_files', 'Builds the files needed for the analysis')
+        RETURNING command_id INTO baf_cmd_id;
+
+    INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value)
+        VALUES (baf_cmd_id, 'analysis', 'analysis', True, NULL),
+               (baf_cmd_id, 'merge_dup_sample_ids', 'bool', False, 'False');
+END $do$
+=======
 -- Feb 3, 2017
 -- adding study tagging system
 
@@ -17,3 +134,4 @@ CREATE TABLE qiita.per_study_tags (
   study_id bigint NOT NULL,
   CONSTRAINT pk_per_study_tags PRIMARY KEY ( study_tag_id, study_id )
 ) ;
+>>>>>>> ee170a08ec44fceb6c20b278279b8ce4b3d10a89