diff --git a/qiita_db/artifact.py b/qiita_db/artifact.py index f9321f1f6..15aa20a7f 100644 --- a/qiita_db/artifact.py +++ b/qiita_db/artifact.py @@ -573,11 +573,12 @@ def delete(cls, artifact_id): WHERE artifact_id = %s""" qdb.sql_connection.TRN.add(sql, [artifact_id]) - # If the artifact doesn't have parents, we move the files to the - # uploads folder. We also need to nullify the column in the prep - # template table - if not instance.parents: - qdb.util.move_filepaths_to_upload_folder(study.id, filepaths) + # If the artifact doesn't have parents and study is not None (is an + # analysis), we move the files to the uploads folder. We also need + # to nullify the column in the prep template table + if not instance.parents and study is not None: + qdb.util.move_filepaths_to_upload_folder( + study.id, filepaths) sql = """UPDATE qiita.prep_template SET artifact_id = NULL diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py index 502a31985..6ffcb4b61 100644 --- a/qiita_db/metadata_template/base_metadata_template.py +++ b/qiita_db/metadata_template/base_metadata_template.py @@ -1124,7 +1124,7 @@ def extend(self, md_template): self.validate(self.columns_restrictions) self.generate_files() - def update(self, md_template): + def _update(self, md_template): r"""Update values in the template Parameters @@ -1143,22 +1143,19 @@ def update(self, md_template): passed md_template """ with qdb.sql_connection.TRN: - # Clean and validate the metadata template given - new_map = self._clean_validate_template( - md_template, self.study_id, current_columns=self.categories()) # Retrieving current metadata current_map = self.to_dataframe() # simple validations of sample ids and column names - samples_diff = set(new_map.index).difference(current_map.index) + samples_diff = set(md_template.index).difference(current_map.index) if samples_diff: raise qdb.exceptions.QiitaDBError( 'The new template differs from what is stored ' 'in database by these samples names: %s' % ', '.join(samples_diff)) - if not set(current_map.columns).issuperset(new_map.columns): - columns_diff = set(new_map.columns).difference( + if not set(current_map.columns).issuperset(md_template.columns): + columns_diff = set(md_template.columns).difference( current_map.columns) raise qdb.exceptions.QiitaDBError( 'Some of the columns in your template are not present in ' @@ -1168,15 +1165,16 @@ def update(self, md_template): # In order to speed up some computation, let's compare only the # common columns and rows. current_map.columns and - # current_map.index are supersets of new_map.columns and - # new_map.index, respectivelly, so this will not fail - current_map = current_map[new_map.columns].loc[new_map.index] + # current_map.index are supersets of md_template.columns and + # md_template.index, respectivelly, so this will not fail + current_map = current_map[ + md_template.columns].loc[md_template.index] # Get the values that we need to change # diff_map is a DataFrame that hold boolean values. If a cell is - # True, means that the new_map is different from the current_map - # while False means that the cell has the same value - diff_map = current_map != new_map + # True, means that the md_template is different from the + # current_map while False means that the cell has the same value + diff_map = current_map != md_template # ne_stacked holds a MultiIndexed DataFrame in which the first # level of indexing is the sample_name and the second one is the # columns. We only have 1 column, which holds if that @@ -1195,8 +1193,8 @@ def update(self, md_template): changed.index.names = ['sample_name', 'column'] # the combination of np.where and boolean indexing produces # a numpy array with only the values that actually changed - # between the current_map and new_map - changed_to = new_map.values[np.where(diff_map)] + # between the current_map and md_template + changed_to = md_template.values[np.where(diff_map)] # to_update is a MultiIndexed DataFrame, in which the index 0 is # the samples and the index 1 is the columns, we define these @@ -1235,12 +1233,57 @@ def update(self, md_template): """.format(self._table_name(self._id), sql_eq_cols, single_value, sql_cols) for sample in samples_to_update: - sample_vals = [new_map[col][sample] for col in cols_to_update] + sample_vals = [md_template[col][sample] + for col in cols_to_update] sample_vals.insert(0, sample) qdb.sql_connection.TRN.add(sql, sample_vals) qdb.sql_connection.TRN.execute() + def update(self, md_template): + r"""Update values in the template + + Parameters + ---------- + md_template : DataFrame + The metadata template file contents indexed by samples ids + + Raises + ------ + QiitaDBError + If md_template and db do not have the same sample ids + If md_template and db do not have the same column headers + If self.can_be_updated is not True + QiitaDBWarning + If there are no differences between the contents of the DB and the + passed md_template + """ + with qdb.sql_connection.TRN: + # Clean and validate the metadata template given + new_map = self._clean_validate_template( + md_template, self.study_id, current_columns=self.categories()) + self._update(new_map) + self.validate(self.columns_restrictions) + self.generate_files() + + def extend_and_update(self, md_template): + """Performs the update and extend operations at once + + Parameters + ---------- + md_template : DataFrame + The metadata template contents indexed by sample ids + + See Also + -------- + update + extend + """ + with qdb.sql_connection.TRN: + md_template = self._clean_validate_template( + md_template, self.study_id, current_columns=self.categories()) + self._common_extend_steps(md_template) + self._update(md_template) self.validate(self.columns_restrictions) self.generate_files() diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py index 82d72a79a..f0d50faae 100644 --- a/qiita_db/metadata_template/test/test_prep_template.py +++ b/qiita_db/metadata_template/test/test_prep_template.py @@ -1280,8 +1280,7 @@ def test_extend_update(self): self.metadata['str_column']['SKB7.640196'] = 'NEW VAL' npt.assert_warns( - qdb.exceptions.QiitaDBWarning, pt.extend, self.metadata) - pt.update(self.metadata) + qdb.exceptions.QiitaDBWarning, pt.extend_and_update, self.metadata) sql = "SELECT * FROM qiita.prep_{0}".format(pt.id) obs = [dict(o) for o in self.conn_handler.execute_fetchall(sql)] diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index b1ae10680..2cdc9a27a 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -1732,8 +1732,8 @@ def test_extend_update(self): md_ext['TOT_NITRO'] = pd.Series(['val1', 'val2', 'val3', 'val4'], index=md_ext.index) - npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext) - st.update(md_ext) + npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend_and_update, + md_ext) exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id, "%s.Sample3" % st.id, "%s.Sample4" % st.id} self.assertEqual(st._get_sample_ids(), exp_sample_ids) diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py index 40d7cc6cc..db7557d8f 100644 --- a/qiita_db/metadata_template/test/test_util.py +++ b/qiita_db/metadata_template/test/test_util.py @@ -149,7 +149,7 @@ def test_load_template_to_dataframe_lowercase(self): def test_load_template_to_dataframe_non_utf8(self): bad = EXP_SAMPLE_TEMPLATE.replace('Test Sample 2', 'Test Sample\x962') - with self.assertRaises(qdb.exceptions.QiitaDBError): + with self.assertRaises(ValueError): qdb.metadata_template.util.load_template_to_dataframe( StringIO(bad)) @@ -387,20 +387,20 @@ def test_get_pgsql_reserved_words(self): EXP_SAMPLE_TEMPLATE_SPACES_EMPTY_ROW = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" - "has_physical_specimen\thost_subject_id\tint_column\tlatitude\tlongitude\t" - "physical_location\trequired_sample_info_status\tsample_type\t" - "str_column\n" - "2.Sample1 \t2014-05-29 12:24:51\tTest Sample 1\tTrue\tTrue\t" + "has_physical_specimen\thost_subject_id\tint_column\tlatitude\t" + "longitude\t physical_location\trequired_sample_info_status" + "\tsample_type\tstr_column\n" + " 2.Sample1 \t2014-05-29 12:24:51\tTest Sample 1\tTrue\tTrue\t" "NotIdentified\t1\t42.42\t41.41\tlocation1\treceived\ttype1\t" "Value for sample 1\n" - "2.Sample2 \t2014-05-29 12:24:51\t" + " 2.Sample2 \t2014-05-29 12:24:51\t" "Test Sample 2\tTrue\tTrue\tNotIdentified\t2\t4.2\t1.1\tlocation1\t" "received\ttype1\tValue for sample 2\n" "2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\t" "True\tNotIdentified\t3\t4.8\t4.41\tlocation1\treceived\ttype1\t" "Value for sample 3\n" "\t\t\t\t\t\t\t\t\t\t\t\t\n" - "\t\t\t\t\t\t\t\t\t\t\t\t\n") + "\t\t\t\t\t\t\t\t\t\t \t\t\n") EXP_ST_SPACES_EMPTY_COLUMN = ( "sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t" diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py index ce2b520da..7228bb49f 100644 --- a/qiita_db/metadata_template/util.py +++ b/qiita_db/metadata_template/util.py @@ -7,9 +7,9 @@ # ----------------------------------------------------------------------------- from __future__ import division -from collections import defaultdict from future.utils import PY3, viewitems from six import StringIO +from string import printable import pandas as pd import numpy as np @@ -103,7 +103,27 @@ def load_template_to_dataframe(fn, index='sample_name'): # Load in file lines holdfile = None with open_file(fn, mode='U') as f: + errors = {} holdfile = f.readlines() + # here we are checking for non printable chars AKA non UTF-8 chars + for row, line in enumerate(holdfile): + for col, block in enumerate(line.split('\t')): + tblock = ''.join([c for c in block if c in printable]) + if len(block) != len(tblock): + tblock = ''.join([c if c in printable else '🐾' + for c in block]) + if tblock not in errors: + errors[tblock] = [] + errors[tblock].append('(%d, %d)' % (row, col)) + if bool(errors): + raise ValueError( + "There are invalid (non UTF-8) characters in your information " + "file. The offending fields and their location (row, column) " + "are listed below, invalid characters are represented using " + "🐾: %s" % '; '.join( + ['"%s" = %s' % (k, ', '.join(v)) + for k, v in viewitems(errors)])) + if not holdfile: raise ValueError('Empty file passed!') @@ -137,7 +157,7 @@ def load_template_to_dataframe(fn, index='sample_name'): # .strip will remove odd chars, newlines, tabs and multiple # spaces but we need to read a new line at the end of the # line(+'\n') - newcols = [d.strip(" \r\x0b\x0c\n") for d in cols] + newcols = [d.strip(" \r\n") for d in cols] holdfile[pos] = '\t'.join(newcols) + '\n' @@ -149,34 +169,19 @@ def load_template_to_dataframe(fn, index='sample_name'): # comment: # using the tab character as "comment" we remove rows that are # constituted only by delimiters i. e. empty rows. - try: - template = pd.read_csv( - StringIO(''.join(holdfile)), - sep='\t', - dtype=str, - encoding='utf-8', - infer_datetime_format=False, - keep_default_na=False, - index_col=False, - comment='\t', - converters={index: lambda x: str(x).strip()}) - # remove newlines and tabs from fields - template.replace(to_replace='[\t\n\r\x0b\x0c]+', value='', - regex=True, inplace=True) - except UnicodeDecodeError: - # Find row number and col number for utf-8 encoding errors - headers = holdfile[0].strip().split('\t') - errors = defaultdict(list) - for row, line in enumerate(holdfile, 1): - for col, cell in enumerate(line.split('\t')): - try: - cell.encode('utf-8') - except UnicodeError: - errors[headers[col]].append(row) - lines = ['%s: row(s) %s' % (header, ', '.join(map(str, rows))) - for header, rows in viewitems(errors)] - raise qdb.exceptions.QiitaDBError( - 'Non UTF-8 characters found in columns:\n' + '\n'.join(lines)) + template = pd.read_csv( + StringIO(''.join(holdfile)), + sep='\t', + dtype=str, + encoding='utf-8', + infer_datetime_format=False, + keep_default_na=False, + index_col=False, + comment='\t', + converters={index: lambda x: str(x).strip()}) + # remove newlines and tabs from fields + template.replace(to_replace='[\t\n\r\x0b\x0c]+', value='', + regex=True, inplace=True) initial_columns = set(template.columns) diff --git a/qiita_db/private.py b/qiita_db/private.py index 78a286f51..0f10187ca 100644 --- a/qiita_db/private.py +++ b/qiita_db/private.py @@ -47,8 +47,27 @@ def build_analysis_files(job): j.submit() sleep(1) + # The validator jobs no longer finish the job automatically so we need + # to release the validators here + job.release_validators() -TASK_DICT = {'build_analysis_files': build_analysis_files} + +def release_validators(job): + """Waits until all the validators of a job are completed + + Parameters + ---------- + job : qiita_db.processing_job.ProcessingJob + The processing job with the information of the parent job + """ + with qdb.sql_connection.TRN: + qdb.processing_job.ProcessingJob( + job.parameters.values['job']).release_validators() + job._set_status('success') + + +TASK_DICT = {'build_analysis_files': build_analysis_files, + 'release_validators': release_validators} def private_task(job_id): diff --git a/qiita_db/processing_job.py b/qiita_db/processing_job.py index fa4d93341..86453d4f5 100644 --- a/qiita_db/processing_job.py +++ b/qiita_db/processing_job.py @@ -14,6 +14,7 @@ from itertools import chain from collections import defaultdict from json import dumps, loads +from time import sleep from future.utils import viewitems, viewvalues import networkx as nx @@ -420,8 +421,9 @@ def release_validators(self): "Only artifact transformation and private jobs can " "release validators") - # Check if all the validators are ready by checking that there is - # no validator processing job whose status is not waiting + # Check if all the validators are completed. Validator jobs can be + # in two states when completed: 'waiting' in case of success + # or 'error' otherwise sql = """SELECT COUNT(1) FROM qiita.processing_job_validator pjv JOIN qiita.processing_job pj ON @@ -429,12 +431,49 @@ def release_validators(self): JOIN qiita.processing_job_status USING (processing_job_status_id) WHERE pjv.processing_job_id = %s - AND processing_job_status != %s""" - qdb.sql_connection.TRN.add(sql, [self.id, 'waiting']) + AND processing_job_status NOT IN %s""" + sql_args = [self.id, ('waiting', 'error')] + qdb.sql_connection.TRN.add(sql, sql_args) remaining = qdb.sql_connection.TRN.execute_fetchlast() - if remaining == 0: - # All validators have completed + # Active polling - wait until all validator jobs are completed + while remaining != 0: + self.step = "Validating outputs (%d remaining)" % remaining + sleep(10) + qdb.sql_connection.TRN.add(sql, sql_args) + remaining = qdb.sql_connection.TRN.execute_fetchlast() + + # Check if any of the validators errored + sql = """SELECT validator_id + FROM qiita.processing_job_validator pjv + JOIN qiita.processing_job pj + ON pjv.validator_id = pj.processing_job_id + JOIN qiita.processing_job_status USING + (processing_job_status_id) + WHERE pjv.processing_job_id = %s AND + processing_job_status = %s""" + qdb.sql_connection.TRN.add(sql, [self.id, 'error']) + errored = qdb.sql_connection.TRN.execute_fetchflatten() + + if errored: + # At least one of the validators failed, Set the rest of the + # validators and the current job as failed + qdb.sql_connection.TRN.add(sql, [self.id, 'waiting']) + waiting = qdb.sql_connection.TRN.execute_fetchflatten() + + common_error = "\n".join( + ["Validator %s error message: %s" + % (j, ProcessingJob(j).log.msg) for j in errored]) + + val_error = "%d sister validator jobs failed: %s" % ( + len(errored), common_error) + for j in waiting: + ProcessingJob(j)._set_error(val_error) + + self._set_error('%d validator jobs failed: %s' + % (len(errored), common_error)) + else: + # All validators have successfully completed sql = """SELECT validator_id FROM qiita.processing_job_validator WHERE processing_job_id = %s""" @@ -460,8 +499,6 @@ def release_validators(self): self._update_and_launch_children(mapping) self._set_status('success') - else: - self.step = "Validating outputs (%d remaining)" % remaining def _complete_artifact_definition(self, artifact_data): """"Performs the needed steps to complete an artifact definition job @@ -487,7 +524,6 @@ def _complete_artifact_definition(self, artifact_data): if job_params['provenance'] is not None: # The artifact is a result from a previous job provenance = loads(job_params['provenance']) - job = ProcessingJob(provenance['job']) if provenance.get('data_type') is not None: artifact_data = {'data_type': provenance['data_type'], 'artifact_data': artifact_data} @@ -500,7 +536,6 @@ def _complete_artifact_definition(self, artifact_data): qdb.sql_connection.TRN.execute() # Can't create the artifact until all validators are completed self._set_status('waiting') - job.release_validators() else: # The artifact is uploaded by the user or is the initial # artifact of an analysis @@ -619,6 +654,16 @@ def _complete_artifact_transformation(self, artifacts_data): for j in validator_jobs: j.submit() + # Submit the job that will release all the validators + plugin = qdb.software.Software.from_name_and_version( + 'Qiita', 'alpha') + cmd = plugin.get_command('release_validators') + params = qdb.software.Parameters.load( + cmd, values_dict={'job': self.id}) + job = ProcessingJob.create(self.user, params) + # Doing the submission outside of the transaction + job.submit() + def _set_validator_jobs(self, validator_jobs): """Sets the validator jobs for the current job @@ -673,15 +718,6 @@ def complete(self, success, artifacts_data=None, error=None): else: self._set_status('success') else: - if self.command.software.type == 'artifact definition': - job_params = self.parameters.values - if job_params.get('provenance') is not None: - # This artifact definition job is a result of a command - # run, if it fails, set up the status of the "parent" - # job also as failed, and assign the sem error message - provenance = loads(job_params['provenance']) - job = ProcessingJob(provenance['job']) - job._set_error(error) self._set_error(error) @property diff --git a/qiita_db/support_files/patches/57.sql b/qiita_db/support_files/patches/57.sql new file mode 100644 index 000000000..ac5970659 --- /dev/null +++ b/qiita_db/support_files/patches/57.sql @@ -0,0 +1,19 @@ +-- Aug 8, 2017 +-- Add release validators internal Qiita command + +DO $do$ +DECLARE + qiita_sw_id bigint; + rv_cmd_id bigint; +BEGIN + SELECT software_id INTO qiita_sw_id + FROM qiita.software + WHERE name = 'Qiita' AND version = 'alpha'; + + INSERT INTO qiita.software_command (software_id, name, description) + VALUES (qiita_sw_id, 'release_validators', 'Releases the job validators') + RETURNING command_id INTO rv_cmd_id; + + INSERT INTO qiita.command_parameter (command_id, parameter_name, parameter_type, required, default_value) + VALUES (rv_cmd_id, 'job', 'string', True, NULL); +END $do$; diff --git a/qiita_db/test/test_artifact.py b/qiita_db/test/test_artifact.py index 6441f6baf..ccd03d5be 100644 --- a/qiita_db/test/test_artifact.py +++ b/qiita_db/test/test_artifact.py @@ -721,6 +721,9 @@ def test_create_root_analysis(self): self.assertIsNone(obs.study) self.assertEqual(obs.analysis, qdb.analysis.Analysis(1)) + # testing that it can be deleted + qdb.artifact.Artifact.delete(obs.id) + def test_create_processed(self): exp_params = qdb.software.Parameters.from_default_params( qdb.software.DefaultParameters(1), {'input_data': 1}) @@ -1011,6 +1014,7 @@ def test_delete_as_output_job(self): job.complete(True, artifacts_data=data) job = qdb.processing_job.ProcessingJob( "bcc7ebcd-39c1-43e4-af2d-822e3589f14d") + job.release_validators() artifact = job.outputs['OTU table'] self._clean_up_files.extend([afp for _, afp, _ in artifact.filepaths]) diff --git a/qiita_db/test/test_processing_job.py b/qiita_db/test/test_processing_job.py index 0bfdbb540..84fb9ef4f 100644 --- a/qiita_db/test/test_processing_job.py +++ b/qiita_db/test/test_processing_job.py @@ -359,6 +359,12 @@ def test_complete_multiple_outputs(self): artifact_data_2 = {'filepaths': [(fp2, 'biom')], 'artifact_type': 'BIOM'} obs2._complete_artifact_definition(artifact_data_2) + self.assertEqual(obs1.status, 'waiting') + self.assertEqual(obs2.status, 'waiting') + self.assertEqual(job.status, 'running') + + job.release_validators() + self.assertEqual(obs1.status, 'success') self.assertEqual(obs2.status, 'success') self.assertEqual(job.status, 'success') @@ -386,7 +392,8 @@ def test_complete_artifact_definition(self): qdb.user.User('test@foo.bar'), params) job._set_validator_jobs([obs]) obs._complete_artifact_definition(artifact_data) - self.assertEqual(job.status, 'success') + self.assertEqual(obs.status, 'waiting') + self.assertEqual(job.status, 'running') # Upload case implicitly tested by "test_complete_type" def test_complete_artifact_transformation(self): @@ -476,7 +483,9 @@ def test_complete_success(self): obsjobs = set(self._get_all_job_ids()) - self.assertEqual(len(obsjobs), len(alljobs) + 1) + # The complete call above submits 2 new jobs: the validator job and + # the release validators job. Hence the +2 + self.assertEqual(len(obsjobs), len(alljobs) + 2) self._wait_for_job(job) def test_complete_failure(self): @@ -501,12 +510,17 @@ def test_complete_failure(self): ) obs = qdb.processing_job.ProcessingJob.create( qdb.user.User('test@foo.bar'), params) + job._set_validator_jobs([obs]) obs.complete(False, error="Validation failure") self.assertEqual(obs.status, 'error') self.assertEqual(obs.log.msg, 'Validation failure') + self.assertEqual(job.status, 'running') + job.release_validators() self.assertEqual(job.status, 'error') - self.assertEqual(job.log.msg, 'Validation failure') + self.assertEqual( + job.log.msg, '1 validator jobs failed: Validator %s ' + 'error message: Validation failure' % obs.id) def test_complete_error(self): with self.assertRaises( @@ -628,6 +642,7 @@ def test_outputs(self): job._set_validator_jobs([obs]) exp_artifact_count = qdb.util.get_count('qiita.artifact') + 1 obs._complete_artifact_definition(artifact_data) + job.release_validators() self.assertEqual(job.status, 'success') obs = job.outputs diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py index ebeae96a1..b223bdee4 100644 --- a/qiita_db/test/test_util.py +++ b/qiita_db/test/test_util.py @@ -800,36 +800,37 @@ def test_generate_study_list(self): qdb.user.User('shared@foo.bar'), 'test_study_1', info=info) exp_info = [ - {'status': 'private', 'metadata_complete': True, - 'study_tags': None, 'publication_doi': [ - '10.100/123456', '10.100/7891011'], - 'study_title': ('Identification of the Microbiomes for ' - 'Cannabis Soils'), - 'publication_pid': ['123456', '7891011'], + {'status': 'private', 'study_title': ( + 'Identification of the Microbiomes for Cannabis Soils'), + 'metadata_complete': True, 'publication_pid': [ + '123456', '7891011'], 'artifact_biom_ids': [4, 5, 6, 7], 'ebi_submission_status': 'submitted', 'study_id': 1, - 'ebi_study_accession': 'EBI123456-BB', + 'ebi_study_accession': 'EBI123456-BB', 'owner': 'Dude', 'shared': [('shared@foo.bar', 'Shared')], 'study_abstract': ( 'This is a preliminary study to examine the microbiota ' - 'associated with the Cannabis plant. Soils samples from the ' - 'bulk soil, soil associated with the roots, and the ' - 'rhizosphere were extracted and the DNA sequenced. Roots from ' - 'three independent plants of different strains were examined. ' - 'These roots were obtained November 11, 2011 from plants that ' - 'had been harvested in the summer. Future studies will ' - 'attempt to analyze the soils and rhizospheres from the same ' - 'location at different time points in the plant lifecycle.'), - 'pi': ('PI_dude@foo.bar', 'PIDude'), - 'artifact_biom_ids': [4, 5, 6, 7], + 'associated with the Cannabis plant. Soils samples from ' + 'the bulk soil, soil associated with the roots, and the ' + 'rhizosphere were extracted and the DNA sequenced. Roots ' + 'from three independent plants of different strains were ' + 'examined. These roots were obtained November 11, 2011 from ' + 'plants that had been harvested in the summer. Future studies ' + 'will attempt to analyze the soils and rhizospheres from the ' + 'same location at different time points in the plant ' + 'lifecycle.'), 'pi': ('PI_dude@foo.bar', 'PIDude'), + 'publication_doi': ['10.100/123456', '10.100/7891011'], + 'study_alias': 'Cannabis Soils', 'study_tags': None, 'number_samples_collected': 27}, - {'status': 'sandbox', 'metadata_complete': True, - 'study_tags': None, 'publication_doi': [], - 'study_title': 'test_study_1', 'publication_pid': [], + {'status': 'sandbox', 'study_title': 'test_study_1', + 'metadata_complete': True, 'publication_pid': [], + 'artifact_biom_ids': None, 'ebi_submission_status': 'not submitted', 'study_id': new_study.id, 'ebi_study_accession': None, - 'shared': [], 'study_abstract': 'Some abstract goes here', - 'pi': ('lab_dude@foo.bar', 'LabDude'), - 'artifact_biom_ids': None, 'number_samples_collected': 0}] + 'owner': 'Shared', 'shared': [], + 'study_abstract': 'Some abstract goes here', + 'pi': ('lab_dude@foo.bar', 'LabDude'), 'publication_doi': [], + 'study_alias': 'TST', 'study_tags': None, + 'number_samples_collected': 0}] obs_info = qdb.util.generate_study_list([1, 2, 3, 4], True) self.assertEqual(obs_info, exp_info) @@ -851,20 +852,21 @@ def test_get_artifacts_information(self): exp = [ {'files': ['1_study_1001_closed_reference_otu_table.biom'], - 'target_subfragment': ['V4'], 'data_type': '18S', + 'target_subfragment': ['V4'], 'artifact_id': 4, + 'algorithm': ('Pick closed-reference OTUs, QIIMEv1.9.1 |' + ' barcode_type 8, defaults'), + 'data_type': '18S', 'prep_samples': 27, 'parameters': { 'reference': 1, 'similarity': 0.97, 'sortmerna_e_value': 1, 'sortmerna_max_pos': 10000, 'input_data': 2, 'threads': 1, - 'sortmerna_coverage': 0.97}, 'name': 'BIOM', - 'algorithm': ('Pick closed-reference OTUs, QIIMEv1.9.1 ' - '| barcode_type 8, defaults'), 'artifact_id': 4}, - {'files': [], 'target_subfragment': ['V4'], 'data_type': '16S', - 'parameters': {}, 'name': 'BIOM', 'algorithm': '', - 'artifact_id': 7}, + 'sortmerna_coverage': 0.97}, 'name': 'BIOM'}, + {'files': [], 'target_subfragment': ['V4'], 'algorithm': '', + 'artifact_id': 7, 'data_type': '16S', 'prep_samples': 27, + 'parameters': {}, 'name': 'BIOM'}, {'files': ['biom_table.biom'], 'target_subfragment': [], - 'data_type': '18S', 'parameters': {}, 'name': 'noname', - 'algorithm': '', 'artifact_id': 8}] - self.assertEqual(obs, exp) + 'algorithm': '', 'artifact_id': 8, 'data_type': '18S', + 'prep_samples': 0, 'parameters': {}, 'name': 'noname'}] + self.assertItemsEqual(obs, exp) if __name__ == '__main__': diff --git a/qiita_db/util.py b/qiita_db/util.py index ed5789b3c..4adc16945 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -1205,7 +1205,7 @@ def generate_study_list(study_ids, public_only=False): ----- The main select might look scary but it's pretty simple: - We select the requiered fields from qiita.study and qiita.study_person - SELECT metadata_complete, study_abstract, study_id, + SELECT metadata_complete, study_abstract, study_id, study_alias, study_title, ebi_study_accession, ebi_submission_status, qiita.study_person.name AS pi_name, qiita.study_person.email AS pi_email, @@ -1235,10 +1235,13 @@ def generate_study_list(study_ids, public_only=False): - all study tags (SELECT array_agg(study_tag) FROM qiita.per_study_tags WHERE study_id=qiita.study.study_id) AS study_tags + - study owner + (SELECT name FROM qiita.qiita_user + WHERE email=qiita.study.email) AS owner """ with qdb.sql_connection.TRN: sql = """ - SELECT metadata_complete, study_abstract, study_id, + SELECT metadata_complete, study_abstract, study_id, study_alias, study_title, ebi_study_accession, ebi_submission_status, qiita.study_person.name AS pi_name, qiita.study_person.email AS pi_email, @@ -1261,7 +1264,9 @@ def generate_study_list(study_ids, public_only=False): LEFT JOIN qiita.qiita_user USING (email) WHERE study_id=qiita.study.study_id) AS shared_with_email, (SELECT array_agg(study_tag) FROM qiita.per_study_tags - WHERE study_id=qiita.study.study_id) AS study_tags + WHERE study_id=qiita.study.study_id) AS study_tags, + (SELECT name FROM qiita.qiita_user + WHERE email=qiita.study.email) AS owner FROM qiita.study LEFT JOIN qiita.study_person ON ( study_person_id=principal_investigator_id) @@ -1303,6 +1308,8 @@ def generate_study_list(study_ids, public_only=False): del info["shared_with_email"] infolist.append({ + 'owner': info['owner'], + 'study_alias': info['study_alias'], 'metadata_complete': info['metadata_complete'], 'publication_pid': info['publication_pid'], 'ebi_submission_status': info['ebi_submission_status'], @@ -1375,7 +1382,7 @@ def get_artifacts_information(artifact_ids, only_biom=True): WHERE table_name = 'prep_' || CAST( prep_template_id AS TEXT) AND column_name='target_subfragment') - THEN prep_template_id ELSE NULL END + THEN prep_template_id ELSE NULL END, prep_template_id FROM main_query LEFT JOIN qiita.prep_template pt ON ( main_query.root_id = pt.artifact_id) @@ -1402,10 +1409,13 @@ def get_artifacts_information(artifact_ids, only_biom=True): qdb.sql_connection.TRN.execute_fetchindex()} # now let's get the actual artifacts + ts = {} + ps = {} + PT = qdb.metadata_template.prep_template.PrepTemplate qdb.sql_connection.TRN.add(sql, [tuple(artifact_ids)]) for row in qdb.sql_connection.TRN.execute_fetchindex(): aid, name, cid, gt, aparams, dt, pid, pparams, filepaths, _, \ - target = row + target, prep_template_id = row # cleaning fields: # - [0] due to the array_agg @@ -1442,9 +1452,27 @@ def get_artifacts_information(artifact_ids, only_biom=True): algorithm = '%s | %s' % (commands[cid], pparams) + if target is None: + target = [] + else: + if target not in ts: + qdb.sql_connection.TRN.add(sql_ts, [target]) + ts[target] = \ + qdb.sql_connection.TRN.execute_fetchflatten() + target = ts[target] + + if prep_template_id is None: + prep_samples = 0 + else: + if prep_template_id not in ps: + ps[prep_template_id] = len(list( + PT(prep_template_id).keys())) + prep_samples = ps[prep_template_id] + results.append({ 'artifact_id': aid, 'target_subfragment': target, + 'prep_samples': prep_samples, 'name': name, 'data_type': dt, 'timestamp': str(gt), @@ -1452,20 +1480,4 @@ def get_artifacts_information(artifact_ids, only_biom=True): 'algorithm': algorithm, 'files': filepaths}) - # let's get the values for target_subfragment from the - # prep_template, note that we have to do it in a separate sql - # doing crosstab is really difficult and in another loop cause we - # need to loop over all execute_fetchindex before doing another - # query - ts = {} - for i, r in enumerate(results): - pid = r['target_subfragment'] - if pid is None: - results[i]['target_subfragment'] = [] - else: - if pid not in ts: - qdb.sql_connection.TRN.add(sql_ts, [pid]) - ts[pid] = qdb.sql_connection.TRN.execute_fetchflatten() - results[i]['target_subfragment'] = ts[pid] - return results diff --git a/qiita_pet/handlers/api_proxy/tests/test_artifact.py b/qiita_pet/handlers/api_proxy/tests/test_artifact.py index 23ff33302..ec5d6d9a2 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_artifact.py +++ b/qiita_pet/handlers/api_proxy/tests/test_artifact.py @@ -223,27 +223,34 @@ def test_artifact_get_info(self): obs = artifact_get_info('test@foo.bar', [5, 6, 7]) data = [ {'files': ['1_study_1001_closed_reference_otu_table_Silva.biom'], - 'target_subfragment': ['V4'], 'algorithm': ( - 'Pick closed-reference OTUs, QIIMEv1.9.1 | barcode_type 8, ' - 'defaults'), 'artifact_id': 6, 'data_type': '16S', - 'timestamp': '2012-10-02 17:30:00', 'parameters': { - 'reference': 2, 'similarity': 0.97, 'sortmerna_e_value': 1, + 'target_subfragment': ['V4'], + 'algorithm': ('Pick closed-reference OTUs, QIIMEv1.9.1 | ' + 'barcode_type 8, defaults'), + 'artifact_id': 6, 'data_type': '16S', + 'timestamp': '2012-10-02 17:30:00', 'prep_samples': 27, + 'parameters': { + 'reference': 2, 'similarity': 0.97, u'sortmerna_e_value': 1, 'sortmerna_max_pos': 10000, 'input_data': 2, 'threads': 1, 'sortmerna_coverage': 0.97}, 'name': 'BIOM'}, {'files': ['1_study_1001_closed_reference_otu_table.biom'], - 'target_subfragment': ['V4'], 'algorithm': ( - 'Pick closed-reference OTUs, QIIMEv1.9.1 | barcode_type 8, ' - 'defaults'), 'artifact_id': 5, 'data_type': '18S', - 'timestamp': '2012-10-02 17:30:00', 'parameters': { - 'reference': 1, 'similarity': 0.97, 'sortmerna_e_value': 1, - 'sortmerna_max_pos': 10000, 'input_data': 2, 'threads': 1, - 'sortmerna_coverage': 0.97}, 'name': 'BIOM'}, + 'target_subfragment': ['V4'], + 'algorithm': ('Pick closed-reference OTUs, QIIMEv1.9.1 | ' + 'barcode_type 8, defaults'), + 'artifact_id': 5, 'data_type': '18S', + 'timestamp': '2012-10-02 17:30:00', 'prep_samples': 27, + 'parameters': { + 'reference': 1, 'similarity': 0.97, 'sortmerna_e_value': 1, + 'sortmerna_max_pos': 10000, 'input_data': 2, 'threads': 1, + 'sortmerna_coverage': 0.97}, 'name': 'BIOM'}, {'files': [], 'target_subfragment': ['V4'], 'algorithm': '', 'artifact_id': 7, 'data_type': '16S', - 'timestamp': '2012-10-02 17:30:00', 'parameters': {}, - 'name': 'BIOM'}] + 'timestamp': '2012-10-02 17:30:00', 'prep_samples': 27, + 'parameters': {}, 'name': 'BIOM'}] exp = {'status': 'success', 'msg': '', 'data': data} - self.assertEqual(obs, exp) + self.assertItemsEqual(obs.keys(), exp.keys()) + self.assertEqual(obs['status'], exp['status']) + self.assertEqual(obs['msg'], exp['msg']) + self.assertItemsEqual(obs['data'], exp['data']) def test_artifact_post_req(self): # Create new prep template to attach artifact to diff --git a/qiita_pet/handlers/api_proxy/tests/test_studies.py b/qiita_pet/handlers/api_proxy/tests/test_studies.py index a838862ad..b461c47d7 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_studies.py +++ b/qiita_pet/handlers/api_proxy/tests/test_studies.py @@ -50,18 +50,16 @@ def test_study_get_req(self): obs = study_get_req(1, 'test@foo.bar') exp = { 'status': 'success', - 'message': '', 'study_info': { - 'mixs_compliant': True, - 'metadata_complete': True, - 'reprocess': False, - 'emp_person_id': 2, - 'number_samples_promised': 27, - 'funding': None, - 'vamps_id': None, + 'mixs_compliant': True, 'metadata_complete': True, + 'reprocess': False, 'owner': 'test@foo.bar', + 'emp_person_id': 2, 'number_samples_promised': 27, + 'funding': None, 'show_biom_download_button': True, + 'publication_pid': ['123456', '7891011'], 'vamps_id': None, 'first_contact': datetime(2014, 5, 19, 16, 10), - 'timeseries_type_id': 1, - 'study_abstract': + 'ebi_submission_status': 'submitted', + 'show_raw_download_button': True, 'timeseries_type_id': 1, + 'study_abstract': ( 'This is a preliminary study to examine the microbiota ' 'associated with the Cannabis plant. Soils samples from ' 'the bulk soil, soil associated with the roots, and the ' @@ -71,33 +69,25 @@ def test_study_get_req(self): 'from plants that had been harvested in the summer. ' 'Future studies will attempt to analyze the soils and ' 'rhizospheres from the same location at different time ' - 'points in the plant lifecycle.', - 'status': 'private', - 'spatial_series': False, - 'study_description': 'Analysis of the Cannabis Plant ' - 'Microbiome', - 'shared_with': ['shared@foo.bar'], - 'lab_person': {'affiliation': 'knight lab', - 'name': 'LabDude', - 'email': 'lab_dude@foo.bar'}, - 'principal_investigator': {'affiliation': 'Wash U', - 'name': 'PIDude', - 'email': 'PI_dude@foo.bar'}, - 'study_alias': 'Cannabis Soils', - 'study_id': 1, + 'points in the plant lifecycle.'), + 'status': 'private', 'spatial_series': False, + 'study_description': ( + 'Analysis of the Cannabis Plant Microbiome'), + 'shared_with': ['shared@foo.bar'], 'publication_doi': [ + '10.100/123456', '10.100/7891011'], + 'has_access_to_raw_data': True, 'lab_person': { + 'affiliation': 'knight lab', 'name': 'LabDude', + 'email': 'lab_dude@foo.bar'}, + 'principal_investigator': { + 'affiliation': 'Wash U', 'name': 'PIDude', + 'email': 'PI_dude@foo.bar'}, + 'study_alias': 'Cannabis Soils', 'study_id': 1, 'most_recent_contact': datetime(2014, 5, 19, 16, 11), - 'publication_doi': ['10.100/123456', '10.100/7891011'], - 'publication_pid': ['123456', '7891011'], - 'num_samples': 27, - 'study_title': 'Identification of the Microbiomes for ' - 'Cannabis Soils', - 'number_samples_collected': 27, - 'owner': 'test@foo.bar', - 'ebi_submission_status': 'submitted', - 'has_access_to_raw_data': True, - 'show_biom_download_button': True, - 'show_raw_download_button': True, - 'ebi_study_accession': 'EBI123456-BB'}, + 'ebi_study_accession': 'EBI123456-BB', 'num_samples': 27, + 'study_title': ( + 'Identification of the Microbiomes for Cannabis Soils'), + 'number_samples_collected': 27}, + 'message': '', 'editable': True} self.assertEqual(obs, exp) @@ -139,6 +129,8 @@ def test_study_get_req(self): 'study_description': 'DESC', 'shared_with': [], 'lab_person': None, + 'study_alias': "FCM", + 'owner': 'Dude', 'principal_investigator': {'affiliation': 'Wash U', 'name': 'PIDude', 'email': 'PI_dude@foo.bar'}, diff --git a/qiita_pet/handlers/artifact_handlers/base_handlers.py b/qiita_pet/handlers/artifact_handlers/base_handlers.py index d56c62c6a..c87cdbb48 100644 --- a/qiita_pet/handlers/artifact_handlers/base_handlers.py +++ b/qiita_pet/handlers/artifact_handlers/base_handlers.py @@ -216,6 +216,8 @@ def artifact_summary_get_request(user, artifact_id): 'processing_jobs': processing_jobs, 'summary': summary, 'job': job_info, + 'artifact_timestamp': artifact.timestamp.strftime( + "%Y-%m-%d %H:%m"), 'errored_jobs': errored_jobs} diff --git a/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py b/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py index 5b34f639e..6136276db 100644 --- a/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py +++ b/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py @@ -92,6 +92,7 @@ def test_artifact_summary_get_request(self): (2L, '1_s_G1_L001_sequences_barcodes.fastq.gz (raw barcodes)')] exp = {'name': 'Raw data 1', 'artifact_id': 1, + 'artifact_timestamp': '2012-10-01 09:10', 'visibility': 'private', 'editable': True, 'buttons': ('