diff --git a/qiita_db/environment_manager.py b/qiita_db/environment_manager.py index 1c5c0b13f..1e1c4a813 100644 --- a/qiita_db/environment_manager.py +++ b/qiita_db/environment_manager.py @@ -382,6 +382,10 @@ def patch(patches_dir=PATCHES_DIR, verbose=False, test=False): Pulls the current patch from the settings table and applies all subsequent patches found in the patches directory. """ + # we are going to open and close 2 main transactions; this is a required + # change since patch 68.sql where we transition to jsonb for all info + # files. The 2 main transitions are: (1) get the current settings, + # (2) each patch in their independent transaction with qdb.sql_connection.TRN: qdb.sql_connection.TRN.add("SELECT current_patch FROM settings") current_patch = qdb.sql_connection.TRN.execute_fetchlast() @@ -398,19 +402,23 @@ def patch(patches_dir=PATCHES_DIR, verbose=False, test=False): else: next_patch_index = sql_patch_files.index(current_sql_patch_fp) + 1 - patch_update_sql = "UPDATE settings SET current_patch = %s" + patch_update_sql = "UPDATE settings SET current_patch = %s" - for sql_patch_fp in sql_patch_files[next_patch_index:]: - sql_patch_filename = basename(sql_patch_fp) + for sql_patch_fp in sql_patch_files[next_patch_index:]: + sql_patch_filename = basename(sql_patch_fp) - # patch 43.sql is when we started testing patches - if sql_patch_filename == '43.sql' and test: - _populate_test_db() + py_patch_fp = corresponding_py_patch( + splitext(basename(sql_patch_fp))[0] + '.py') + py_patch_filename = basename(py_patch_fp) - py_patch_fp = corresponding_py_patch( - splitext(basename(sql_patch_fp))[0] + '.py') - py_patch_filename = basename(py_patch_fp) + # patch 43.sql is when we started testing patches, then in patch + # 68.sql is when we transitioned to jsonb for the info files; let's do + # this in its own transition + if sql_patch_filename == '68.sql' and test: + with qdb.sql_connection.TRN: + _populate_test_db() + with qdb.sql_connection.TRN: with open(sql_patch_fp, 'U') as patch_file: if verbose: print('\tApplying patch %s...' % sql_patch_filename) @@ -425,3 +433,11 @@ def patch(patches_dir=PATCHES_DIR, verbose=False, test=False): print('\t\tApplying python patch %s...' % py_patch_filename) execfile(py_patch_fp, {}) + + # before moving to jsonb for sample/prep info files (patch 69.sql), + # one of the patches used to regenerate the sample information file + # for the test Study (1) so alot of the tests actually expect this. + # Now, trying to regenerate directly in the populate_test_db might + # require too many dev hours so the easiest is just do it here + if test and sql_patch_filename == '69.sql': + qdb.study.Study(1).sample_template.generate_files() diff --git a/qiita_db/meta_util.py b/qiita_db/meta_util.py index b4e11c486..35ab6e788 100644 --- a/qiita_db/meta_util.py +++ b/qiita_db/meta_util.py @@ -316,22 +316,25 @@ def get_lat_longs(): sql = """SELECT DISTINCT table_name FROM information_schema.columns WHERE table_name SIMILAR TO 'sample_[0-9]+' - AND table_schema = 'qiita' - AND column_name IN ('latitude', 'longitude') AND SPLIT_PART(table_name, '_', 2)::int IN %s - GROUP BY table_name HAVING COUNT(column_name) = 2;""" + AND table_schema = 'qiita'""" qdb.sql_connection.TRN.add(sql, [tuple(portal_table_ids)]) - sql = [('SELECT CAST(latitude AS FLOAT), ' - ' CAST(longitude AS FLOAT) ' - 'FROM qiita.%s ' - 'WHERE isnumeric(latitude) AND isnumeric(longitude) ' - "AND latitude <> 'NaN' " - "AND longitude <> 'NaN' " % s) + # we are going to create multiple union selects to retrieve the + # latigute and longitude of all available studies. Note that UNION in + # PostgreSQL automatically removes duplicates + sql_query = """ + SELECT CAST(sample_values->>'latitude' AS FLOAT), + CAST(sample_values->>'longitude' AS FLOAT) + FROM qiita.%s + WHERE isnumeric(sample_values->>'latitude') AND + isnumeric(sample_values->>'longitude')""" + sql = [sql_query % s for s in qdb.sql_connection.TRN.execute_fetchflatten()] sql = ' UNION '.join(sql) qdb.sql_connection.TRN.add(sql) + # note that we are returning set to remove duplicates return qdb.sql_connection.TRN.execute_fetchindex() diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py index 194373be8..9e4b762c5 100644 --- a/qiita_db/metadata_template/base_metadata_template.py +++ b/qiita_db/metadata_template/base_metadata_template.py @@ -37,10 +37,10 @@ from __future__ import division from future.utils import viewitems -from future.builtins import zip from itertools import chain from copy import deepcopy from datetime import datetime +from json import loads, dumps import pandas as pd import numpy as np @@ -53,6 +53,11 @@ from string import letters, digits +# this is the name of the sample where we store all columns for a sample/prep +# information +QIITA_COLUMN_NAME = 'qiita_sample_column_names' + + class BaseSample(qdb.base.QiitaObject): r"""Sample object that accesses the db to get the information of a sample belonging to a PrepTemplate or a SampleTemplate. @@ -182,12 +187,17 @@ def _get_categories(self): set of str The set of all available metadata categories """ - # Get all the columns - cols = qdb.util.get_table_cols(self._dynamic_table) - # Remove the sample_id column as this column is used internally for - # data storage and it doesn't actually belong to the metadata - cols.remove('sample_id') - return set(cols) + with qdb.sql_connection.TRN: + sql = """SELECT sample_values->>'columns' + FROM qiita.{0} + WHERE sample_id = '{1}'""".format( + self._dynamic_table, QIITA_COLUMN_NAME) + qdb.sql_connection.TRN.add(sql) + results = qdb.sql_connection.TRN.execute_fetchflatten() + if results: + results = loads(results[0]) + + return set(results) def _to_dict(self): r"""Returns the categories and their values in a dictionary @@ -198,15 +208,14 @@ def _to_dict(self): A dictionary of the form {category: value} """ with qdb.sql_connection.TRN: - sql = "SELECT * FROM qiita.{0} WHERE sample_id=%s".format( - self._dynamic_table) + sql = """SELECT sample_values + FROM qiita.{0} + WHERE sample_id=%s""".format(self._dynamic_table) qdb.sql_connection.TRN.add(sql, [self._id]) - d = dict(qdb.sql_connection.TRN.execute_fetchindex()[0]) - # Remove the sample_id, is not part of the metadata - del d['sample_id'] + result = qdb.sql_connection.TRN.execute_fetchindex() - return d + return result[0]['sample_values'] def __len__(self): r"""Returns the number of metadata categories @@ -249,9 +258,12 @@ def __getitem__(self, key): "Metadata category %s does not exists for sample %s" " in template %d" % (key, self._id, self._md_template.id)) - sql = """SELECT {0} FROM qiita.{1} - WHERE sample_id=%s""".format(key, self._dynamic_table) + sql = """SELECT sample_values->>'{0}' as {0} + FROM qiita.{1} + WHERE sample_id = %s""".format( + key, self._dynamic_table) qdb.sql_connection.TRN.add(sql, [self._id]) + return qdb.sql_connection.TRN.execute_fetchlast() def setitem(self, column, value): @@ -278,9 +290,11 @@ def setitem(self, column, value): (column, self._dynamic_table)) sql = """UPDATE qiita.{0} - SET {1}=%s - WHERE sample_id=%s""".format(self._dynamic_table, column) - qdb.sql_connection.TRN.add(sql, [value, self._id]) + SET sample_values = sample_values || %s + WHERE sample_id = %s""".format(self._dynamic_table) + + qdb.sql_connection.TRN.add(sql, [dumps({column: value}), self.id]) + qdb.sql_connection.TRN.execute() def __setitem__(self, column, value): r"""Sets the metadata value for the category `column` @@ -597,19 +611,20 @@ def _common_creation_steps(cls, md_template, obj_id): # Create table with custom columns table_name = cls._table_name(obj_id) - column_datatype = ["%s varchar" % col for col in headers] sql = """CREATE TABLE qiita.{0} ( - sample_id varchar NOT NULL, {1} - )""".format(table_name, ', '.join(column_datatype)) + sample_id VARCHAR NOT NULL PRIMARY KEY, + sample_values JSONB NOT NULL)""".format(table_name) qdb.sql_connection.TRN.add(sql) - values = [list(md_template[h]) for h in headers] - values.insert(0, sample_ids) - values = [list(v) for v in zip(*values)] - sql = """INSERT INTO qiita.{0} (sample_id, {1}) - VALUES (%s, {2})""".format( - table_name, ", ".join(headers), - ', '.join(["%s"] * len(headers))) + values = dumps({"columns": md_template.columns.tolist()}) + sql = """INSERT INTO qiita.{0} (sample_id, sample_values) + VALUES ('{1}', %s)""".format( + table_name, QIITA_COLUMN_NAME) + qdb.sql_connection.TRN.add(sql, [values]) + + values = [(k, df.to_json()) for k, df in md_template.iterrows()] + sql = """INSERT INTO qiita.{0} (sample_id, sample_values) + VALUES (%s, %s)""".format(table_name) qdb.sql_connection.TRN.add(sql, values, many=True) # Execute all the steps @@ -625,17 +640,28 @@ def metadata_headers(cls): Alphabetical list of all metadata headers available """ with qdb.sql_connection.TRN: - sql = """SELECT DISTINCT column_name - FROM information_schema.columns - WHERE table_name LIKE '{0}%' AND - table_name != 'sample_template_filepath' AND - table_name != 'prep_template_filepath' AND - table_name != 'prep_template_sample' AND - table_name != 'prep_template_processing_job' AND - table_name != 'prep_template' - ORDER BY column_name""".format(cls._table_prefix) + sql = """SELECT DISTINCT table_name + FROM information_schema.columns + WHERE table_name LIKE '{0}%' AND + table_name != 'sample_template_filepath' AND + table_name != 'prep_template_filepath' AND + table_name != 'prep_template_sample' AND + table_name != 'prep_template_processing_job' AND + table_name != 'prep_template'""".format( + cls._table_prefix) qdb.sql_connection.TRN.add(sql) - return qdb.sql_connection.TRN.execute_fetchflatten() + tables = qdb.sql_connection.TRN.execute_fetchflatten() + sql = """SELECT sample_values->>'columns' + FROM qiita.%s WHERE sample_id = '{0}'""".format( + QIITA_COLUMN_NAME) + results = [] + for t in tables: + qdb.sql_connection.TRN.add(sql % t) + vals = qdb.sql_connection.TRN.execute_fetchflatten() + if vals: + results.extend(loads(vals[0])) + + return list(set(results)) def _common_delete_sample_steps(self, sample_names): r"""Executes the common delete sample steps @@ -705,9 +731,24 @@ def delete_column(self, column_name): column_name) with qdb.sql_connection.TRN: - sql = 'ALTER TABLE qiita.%s%d DROP COLUMN %s' % ( - self._table_prefix, self._id, column_name) - qdb.sql_connection.TRN.add(sql) + table_name = 'qiita.{0}{1}'.format(self._table_prefix, self._id) + # deleting from all samples; note that (-) in pgsql jsonb means + # delete that key and value + sql = """UPDATE {0} + SET sample_values = sample_values - %s + WHERE sample_id != %s""".format(table_name) + qdb.sql_connection.TRN.add(sql, [column_name, QIITA_COLUMN_NAME]) + + # deleting from QIITA_COLUMN_NAME + columns = self.categories() + columns.remove(column_name) + values = '{"columns": %s}' % dumps(columns) + sql = """UPDATE {0} + SET sample_values = %s + WHERE sample_id = '{1}'""".format( + table_name, QIITA_COLUMN_NAME) + qdb.sql_connection.TRN.add(sql, [values]) + qdb.sql_connection.TRN.execute() self.generate_files() @@ -793,56 +834,54 @@ def _common_extend_steps(self, md_template): # code). Sorting the new columns to enforce an order new_cols = sorted(new_cols) - sql_alter = """ALTER TABLE qiita.{0} ADD COLUMN {1} {2}""" - for category in new_cols: - qdb.sql_connection.TRN.add( - sql_alter.format(table_name, category, 'varchar')) + cols = self.categories() + cols.extend(new_cols) + + values = dumps({"columns": cols}) + sql = """UPDATE qiita.{0} + SET sample_values = %s + WHERE sample_id = '{1}'""".format( + table_name, QIITA_COLUMN_NAME) + qdb.sql_connection.TRN.add(sql, [values]) if existing_samples: # The values for the new columns are the only ones that get # added to the database. None of the existing values will - # be modified (see update for that functionality) + # be modified (see update for that functionality). Remember + # that || is a jsonb to update or add a new key/value md_filtered = md_template[new_cols].loc[existing_samples] - values = [list(md_filtered[h]) for h in md_filtered] - values.append(existing_samples) - values = [list(v) for v in zip(*values)] - # psycopg2 requires a list of iterable, in which each - # iterable is a set of values to use in the string - # formatting of the query. We have all the values in - # different lists (but in the same order) so use zip to - # create the list of iterable that psycopg2 requires. - set_str = ["{0} = %s".format(col) for col in new_cols] - sql = """UPDATE qiita.{0} - SET {1} - WHERE sample_id=%s""".format(table_name, - ",".join(set_str)) - for v in values: - qdb.sql_connection.TRN.add(sql, v) + for sid, df in md_filtered.iterrows(): + values = dict(df.iteritems()) + sql = """UPDATE qiita.{0} + SET sample_values = sample_values || %s + WHERE sample_id = %s""".format( + self._table_name(self._id)) + qdb.sql_connection.TRN.add(sql, [dumps(values), sid]) if new_samples: warnings.warn( "The following samples have been added to the existing" " template: %s" % ", ".join(new_samples), qdb.exceptions.QiitaDBWarning) + new_samples = sorted(new_samples) + # At this point we only want the information # from the new samples md_filtered = md_template.loc[new_samples] - # Insert new_samples in the study table + + # Insert new samples to the study sample table values = [[self._id, s_id] for s_id in new_samples] sql = """INSERT INTO qiita.{0} ({1}, sample_id) VALUES (%s, %s)""".format(self._table, self._id_column) qdb.sql_connection.TRN.add(sql, values, many=True) - # Insert values on custom table - values = [list(md_filtered[h]) for h in md_filtered] - values.insert(0, new_samples) - values = [list(v) for v in zip(*values)] - sql = """INSERT INTO qiita.{0} (sample_id, {1}) - VALUES (%s, {2})""".format( - table_name, ", ".join(headers), - ', '.join(["%s"] * len(headers))) + # inserting new samples to the info file + values = [(k, row.to_json()) + for k, row in md_filtered.iterrows()] + sql = """INSERT INTO qiita.{0} (sample_id, sample_values) + VALUES (%s, %s)""".format(table_name) qdb.sql_connection.TRN.add(sql, values, many=True) # Execute all the steps @@ -1105,20 +1144,41 @@ def to_dataframe(self): """ with qdb.sql_connection.TRN: # Retrieve all the information from the database - sql = "SELECT * FROM qiita.{0}".format(self._table_name(self._id)) + cols = self.categories() + sql = """SELECT sample_id, sample_values + FROM qiita.{0} + WHERE sample_id != '{1}'""".format( + self._table_name(self._id), QIITA_COLUMN_NAME) qdb.sql_connection.TRN.add(sql) - meta = qdb.sql_connection.TRN.execute_fetchindex() - - # When we create the dataframe, we are providing a matrix (list of - # lists) with all the data, so we need to make sure that all the - # rows contain the columns in the same order - cols = sorted(meta[0].keys()) - meta_matrix = [[r[c] for c in cols] for r in meta] - df = pd.DataFrame(meta_matrix, columns=cols, dtype=str) + # this query is going to return a tuple + # (sample_id, dict of columns/values); however it's important to + # notice that we can't assure that all column/values pairs are the + # same for all samples as we are not doing full bookkeeping of all + # the columns in all the samples. Thus, we have 2 options: + # 1. use dict() on the query result with pd.DataFrame.from_dict so + # pandas deals with this; but this takes a crazy amount of time, + # for more info google: "performance pandas from_dict" + # 2. generate a matrix rows/samples, cols/values and load them + # via pandas.DataFrame, which actually has good performace + data = [] + for sid, values in qdb.sql_connection.TRN.execute_fetchindex(): + # creating row of values, first insert sample id + vals = [sid] + # then loop over all the possible values making sure that if + # the column doesn't exist in that sample, it gets a None + for c in cols: + v = None + if c in values: + v = values[c] + vals.append(v) + # append the row to the full matrix + data.append(vals) + cols.insert(0, 'sample_id') + df = pd.DataFrame(data, columns=cols, dtype=str) + df.set_index('sample_id', inplace=True) # Make sure that we are changing np.NaN by Nones df.where((pd.notnull(df)), None) - df.set_index('sample_id', inplace=True, drop=True) id_column_name = 'qiita_%sid' % (self._table_prefix) if id_column_name == 'qiita_sample_id': id_column_name = 'qiita_study_id' @@ -1155,18 +1215,26 @@ def get_filepaths(self): sort='descending')] def categories(self): - """Identifies the metadata columns present in a template + """Identifies the metadata columns present in an info file Returns ------- cols : list - The static and dynamic category fields - + The category fields """ - cols = qdb.util.get_table_cols(self._table_name(self._id)) - cols.remove("sample_id") + with qdb.sql_connection.TRN: + sql = """SELECT sample_values->>'columns' + FROM qiita.{0} + WHERE sample_id = '{1}'""".format( + self._table_name(self._id), QIITA_COLUMN_NAME) + + qdb.sql_connection.TRN.add(sql) - return cols + results = qdb.sql_connection.TRN.execute_fetchflatten() + if results: + results = sorted(loads(results[0])) + + return results def extend(self, md_template): """Adds the given template to the current one @@ -1239,7 +1307,7 @@ def _update(self, md_template): # columns. We only have 1 column, which holds if that # (sample, column) pair has been modified or not (i.e. cell) ne_stacked = diff_map.stack() - # by using ne_stacked to index himself, we get only the columns + # by using ne_stacked to index itself, we get only the columns # that did change (see boolean indexing in pandas docs) changed = ne_stacked[ne_stacked] if changed.empty: @@ -1254,48 +1322,55 @@ def _update(self, md_template): # a numpy array with only the values that actually changed # between the current_map and md_template changed_to = md_template.values[np.where(diff_map)] - - # to_update is a MultiIndexed DataFrame, in which the index 0 is - # the samples and the index 1 is the columns, we define these - # variables here so we don't put magic numbers across the code - sample_idx = 0 - col_idx = 1 + # now we are going to take that map and create a new DataFrame + # which is going to have a double level index (sample_id / + # column_name) with a single column 'to'; this will looks something + # like: + # to + # sample_name column + # XX.Sample1 sample_type 6 + # XX.Sample2 sample_type 5 + # host_subject_id the only one + # XX.Sample3 sample_type 10 + # physical_specimen_location new location to_update = pd.DataFrame({'to': changed_to}, index=changed.index) - - # Get the columns that we need to change - indices = list(set(to_update.index.labels[col_idx])) - cols_to_update = to_update.index.levels[col_idx][indices] - - if not self.can_be_updated(columns=set(cols_to_update)): - raise qdb.exceptions.QiitaDBError( - 'The new template is modifying fields that cannot be ' - 'modified. Try removing the restricted fields or ' - 'deleting the processed data. You are trying to modify: %s' - % ', '.join(cols_to_update)) - - # Get the samples that we need to change - indices = list(set(to_update.index.labels[sample_idx])) - samples_to_update = to_update.index.levels[sample_idx][indices] - - sql_eq_cols = ', '.join( - ["{0} = c.{0}".format(col) for col in cols_to_update]) - # We add 1 because we need to add the sample name - single_value = "(%s)" % ', '.join( - ["%s"] * (len(cols_to_update) + 1)) - sql_cols = ', '.join(cols_to_update) - - sql = """UPDATE qiita.{0} AS t SET - {1} - FROM (VALUES {2}) - AS c(sample_id, {3}) - WHERE c.sample_id = t.sample_id - """.format(self._table_name(self._id), sql_eq_cols, - single_value, sql_cols) - for sample in samples_to_update: - sample_vals = [md_template[col][sample] - for col in cols_to_update] - sample_vals.insert(0, sample) - qdb.sql_connection.TRN.add(sql, sample_vals) + # reset_index will expand the multi-index and convert the example + # to: + # sample_name column to + # 0 XX.Sample1 sample_type 6 + # 1 XX.Sample2 sample_type 5 + # 2 XX.Sample2 host_subject_id the only one + # 3 XX.Sample3 sample_type 10 + # 4 XX.Sample3 physical_specimen_location new location + to_update.reset_index(inplace=True) + new_columns = [] + for sid, df in to_update.groupby('sample_name'): + # getting just columns: column and to, and then using column + # as index will generate this for XX.Sample2: + # to + # column + # sample_type 5 + # host_subject_id the only one + df = df[['column', 'to']].set_index('column') + # finally to_dict in XX.Sample2: + # {'to': {'host_subject_id': 'the only one', + # 'sample_type': '5'}} + values = df.to_dict()['to'] + new_columns.extend(values.keys()) + sql = """UPDATE qiita.{0} + SET sample_values = sample_values || %s + WHERE sample_id = %s""".format( + self._table_name(self._id)) + qdb.sql_connection.TRN.add(sql, [dumps(values), sid]) + + new_columns = list(set(new_columns).union(set(self.categories()))) + table_name = self._table_name(self.id) + values = dumps({"columns": new_columns}) + sql = """UPDATE qiita.{0} + SET sample_values = %s + WHERE sample_id = '{1}'""".format( + table_name, QIITA_COLUMN_NAME) + qdb.sql_connection.TRN.add(sql, [values]) qdb.sql_connection.TRN.execute() @@ -1397,9 +1472,12 @@ def get_category(self, category): If category is not part of the template """ with qdb.sql_connection.TRN: - qdb.util.check_table_cols([category], self._table_name(self._id)) - sql = 'SELECT sample_id, {0} FROM qiita.{1}'.format( - category, self._table_name(self._id)) + if category not in self.categories(): + raise qdb.exceptions.QiitaDBColumnError(category) + sql = """SELECT sample_id, sample_values->>'{0}' as {0} + FROM qiita.{1} + WHERE sample_id != '{2}'""".format( + category, self._table_name(self._id), QIITA_COLUMN_NAME) qdb.sql_connection.TRN.add(sql) return dict(qdb.sql_connection.TRN.execute_fetchindex()) diff --git a/qiita_db/metadata_template/prep_template.py b/qiita_db/metadata_template/prep_template.py index 70cf2c094..b93d192e1 100644 --- a/qiita_db/metadata_template/prep_template.py +++ b/qiita_db/metadata_template/prep_template.py @@ -21,7 +21,8 @@ import qiita_db as qdb from .constants import (PREP_TEMPLATE_COLUMNS, TARGET_GENE_DATA_TYPES, PREP_TEMPLATE_COLUMNS_TARGET_GENE) -from .base_metadata_template import BaseSample, MetadataTemplate +from .base_metadata_template import ( + BaseSample, MetadataTemplate, QIITA_COLUMN_NAME) def _check_duplicated_columns(prep_cols, sample_cols): @@ -94,7 +95,8 @@ class PrepTemplate(MetadataTemplate): _forbidden_words = { 'sampleid', 'qiita_study_id', - 'qiita_prep_id'} + 'qiita_prep_id', + QIITA_COLUMN_NAME} @classmethod def create(cls, md_template, study, data_type, investigation_type=None, diff --git a/qiita_db/metadata_template/sample_template.py b/qiita_db/metadata_template/sample_template.py index 0c6dd41d3..71d5b74f0 100644 --- a/qiita_db/metadata_template/sample_template.py +++ b/qiita_db/metadata_template/sample_template.py @@ -13,7 +13,8 @@ from qiita_core.exceptions import IncompetentQiitaDeveloperError import qiita_db as qdb -from .base_metadata_template import BaseSample, MetadataTemplate +from .base_metadata_template import ( + BaseSample, MetadataTemplate, QIITA_COLUMN_NAME) class Sample(BaseSample): @@ -68,7 +69,8 @@ class SampleTemplate(MetadataTemplate): 'run_prefix', 'sampleid', 'qiita_study_id', - 'qiita_prep_id'} + 'qiita_prep_id', + QIITA_COLUMN_NAME} @classmethod def create(cls, md_template, study): diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py index 009c8addd..3c0dac74f 100644 --- a/qiita_db/metadata_template/test/test_prep_template.py +++ b/qiita_db/metadata_template/test/test_prep_template.py @@ -256,7 +256,7 @@ def test_metadata_headers(self): 'experiment_title', 'illumina_technology', 'instrument_model', 'library_construction_protocol', 'pcr_primers', 'platform', 'primer', 'run_center', 'run_date', 'run_prefix', 'samp_size', - 'sample_center', 'sample_id', 'sequencing_meth', 'study_center', + 'sample_center', 'sequencing_meth', 'study_center', 'target_gene', 'target_subfragment'] self.assertItemsEqual(obs, exp) @@ -280,6 +280,12 @@ def test_delitem(self): @qiita_test_checker() class TestPrepTemplate(TestCase): def setUp(self): + # qdb.metadata_template.base_metadata_template.QIITA_COLUMN_NAME is + # the name of the sample where we store all columns for a sample/prep + # information and in this tests we want to avoid having to import it + # in different places so adding to the setUp + self.QCN = \ + qdb.metadata_template.base_metadata_template.QIITA_COLUMN_NAME self.metadata_dict = { 'SKB8.640193': {'center_name': 'ANL', 'center_project_name': 'Test Project', @@ -847,7 +853,9 @@ def test_create_shorter_prep_template(self): self.metadata, self.test_study, self.data_type) obs = self.conn_handler.execute_fetchall( - "SELECT sample_id FROM qiita.prep_%d" % pt.id) + """SELECT sample_id + FROM qiita.prep_%d + WHERE sample_id != '%s'""" % (pt.id, self.QCN)) exp = [['1.SKB8.640193'], ['1.SKD8.640184']] self.assertEqual(obs, exp) @@ -1275,9 +1283,11 @@ def test_extend_add_cols(self): npt.assert_warns( qdb.exceptions.QiitaDBWarning, pt.extend, self.metadata) - sql = "SELECT * FROM qiita.prep_{0}".format(pt.id) - obs = [dict(o) for o in self.conn_handler.execute_fetchall(sql)] - exp = [{'sample_id': '1.SKB7.640196', + sql = """SELECT * + FROM qiita.prep_{0} + WHERE sample_id != '{1}'""".format(pt.id, self.QCN) + obs = dict(self.conn_handler.execute_fetchall(sql)) + exp = {'1.SKB7.640196': { 'barcode': 'CCTCTGAGAGCT', 'ebi_submission_accession': None, 'experiment_design_description': 'BBBB', @@ -1291,7 +1301,7 @@ def test_extend_add_cols(self): 'center_project_name': 'Test Project', 'emp_status': 'EMP', 'new_col': 'val1'}, - {'sample_id': '1.SKB8.640193', + '1.SKB8.640193': { 'barcode': 'GTCCGCAAGTTA', 'ebi_submission_accession': None, 'experiment_design_description': 'BBBB', @@ -1305,7 +1315,7 @@ def test_extend_add_cols(self): 'center_project_name': 'Test Project', 'emp_status': 'EMP', 'new_col': 'val2'}, - {'sample_id': '1.SKD8.640184', + '1.SKD8.640184': { 'barcode': 'CGTAGAGCTCTC', 'ebi_submission_accession': None, 'experiment_design_description': 'BBBB', @@ -1318,8 +1328,7 @@ def test_extend_add_cols(self): 'center_name': 'ANL', 'center_project_name': 'Test Project', 'emp_status': 'EMP', - 'new_col': 'val3'}] - + 'new_col': 'val3'}} self.assertItemsEqual(obs, exp) def test_extend_update(self): @@ -1332,9 +1341,11 @@ def test_extend_update(self): npt.assert_warns( qdb.exceptions.QiitaDBWarning, pt.extend_and_update, self.metadata) - sql = "SELECT * FROM qiita.prep_{0}".format(pt.id) - obs = [dict(o) for o in self.conn_handler.execute_fetchall(sql)] - exp = [{'sample_id': '1.SKB7.640196', + sql = """SELECT * + FROM qiita.prep_{0} + WHERE sample_id != '{1}'""".format(pt.id, self.QCN) + obs = dict(self.conn_handler.execute_fetchall(sql)) + exp = {'1.SKB7.640196': { 'barcode': 'CCTCTGAGAGCT', 'ebi_submission_accession': None, 'experiment_design_description': 'BBBB', @@ -1348,7 +1359,7 @@ def test_extend_update(self): 'center_project_name': 'Test Project', 'emp_status': 'EMP', 'new_col': 'val1'}, - {'sample_id': '1.SKB8.640193', + '1.SKB8.640193': { 'barcode': 'GTCCGCAAGTTA', 'ebi_submission_accession': None, 'experiment_design_description': 'BBBB', @@ -1362,7 +1373,7 @@ def test_extend_update(self): 'center_project_name': 'Test Project', 'emp_status': 'EMP', 'new_col': 'val2'}, - {'sample_id': '1.SKD8.640184', + '1.SKD8.640184': { 'barcode': 'CGTAGAGCTCTC', 'ebi_submission_accession': None, 'experiment_design_description': 'BBBB', @@ -1375,7 +1386,7 @@ def test_extend_update(self): 'center_name': 'ANL', 'center_project_name': 'Test Project', 'emp_status': 'EMP', - 'new_col': 'val3'}] + 'new_col': 'val3'}} self.assertItemsEqual(obs, exp) diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index 03539fd4c..243117c13 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -483,7 +483,7 @@ def test_metadata_headers(self): 'elevation', 'env_biome', 'env_feature', 'host_subject_id', 'host_taxid', 'latitude', 'longitude', 'ph', 'physical_specimen_location', 'physical_specimen_remaining', - 'samp_salinity', 'sample_id', 'sample_type', 'scientific_name', + 'samp_salinity', 'sample_type', 'scientific_name', 'season_environment', 'taxon_id', 'temp', 'texture', 'tot_nitro', 'tot_org_carb', 'water_content_soil'] self.assertItemsEqual(obs, exp) @@ -1011,7 +1011,7 @@ def test_create(self): 'taxon_id': '9606', 'scientific_name': 'homo sapiens'}} for s_id in exp_sample_ids: - self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) + self.assertDictEqual(st[s_id]._to_dict(), exp_dict[s_id]) exp = {"%s.Sample1" % new_id: None, "%s.Sample2" % new_id: None, "%s.Sample3" % new_id: None} @@ -1343,10 +1343,13 @@ def test_update_numpy(self): sql = "SELECT * FROM qiita.sample_{0}".format(st.id) obs = self.conn_handler.execute_fetchall(sql) - exp = [['%s.Sample1' % self.new_study.id, 'false', - '2015-09-01 00:00:00'], - ['%s.Sample2' % self.new_study.id, 'true', - '2015-09-01 00:00:00']] + exp = [ + ['%s.Sample2' % self.new_study.id, { + 'bool_col': 'true', 'date_col': '2015-09-01 00:00:00'}], + ['%s.Sample1' % self.new_study.id, { + 'bool_col': 'false', 'date_col': '2015-09-01 00:00:00'}], + ['qiita_sample_column_names', { + 'columns': ['bool_col', 'date_col']}]] self.assertEqual(sorted(obs), sorted(exp)) def test_generate_files(self): @@ -1878,8 +1881,7 @@ def test_to_dataframe(self): 'physical_specimen_remaining': 'true', 'dna_extracted': 'true', 'sample_type': 'type1', - 'collection_timestamp': - '2014-05-29 12:24:15', + 'collection_timestamp': '2014-05-29 12:24:15', 'host_subject_id': 'NotIdentified', 'description': 'Test Sample 1', 'latitude': '42.42', @@ -1892,8 +1894,7 @@ def test_to_dataframe(self): 'physical_specimen_remaining': 'true', 'dna_extracted': 'true', 'sample_type': 'type1', - 'collection_timestamp': - '2014-05-29 12:24:15', + 'collection_timestamp': '2014-05-29 12:24:15', 'host_subject_id': 'NotIdentified', 'description': 'Test Sample 2', 'latitude': '4.2', @@ -1906,8 +1907,7 @@ def test_to_dataframe(self): 'physical_specimen_remaining': 'true', 'dna_extracted': 'true', 'sample_type': 'type1', - 'collection_timestamp': - '2014-05-29 12:24:15', + 'collection_timestamp': '2014-05-29 12:24:15', 'host_subject_id': 'NotIdentified', 'description': 'Test Sample 3', 'latitude': '4.8', @@ -1922,7 +1922,7 @@ def test_to_dataframe(self): obs.sort_index(axis=1, inplace=True) exp.sort_index(axis=0, inplace=True) exp.sort_index(axis=1, inplace=True) - assert_frame_equal(obs, exp) + assert_frame_equal(obs, exp, check_column_type=False) obs = self.tester.to_dataframe() # We don't test the specific values as this would blow up the size diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py index 09ddb2042..3ef970c9f 100644 --- a/qiita_db/metadata_template/util.py +++ b/qiita_db/metadata_template/util.py @@ -40,7 +40,7 @@ def prefix_sample_names_with_id(md_template, study_id): else '%d.%s' % (study_id, idx) for idx in md_template.index], index=md_template.index) - # get the rows that are gonna change + # get the rows that are going to change changes = len(md_template.index[ md_template['qiita_sample_name_with_id'] != md_template.index]) if changes != 0 and changes != len(md_template.index): diff --git a/qiita_db/processing_job.py b/qiita_db/processing_job.py index 98db5b008..82c84d2b6 100644 --- a/qiita_db/processing_job.py +++ b/qiita_db/processing_job.py @@ -138,7 +138,7 @@ def create(cls, user, parameters, force=False): Notes ----- - If force is True the job is gonna be created even if another job + If force is True the job is going to be created even if another job exists with the same parameters """ TTRN = qdb.sql_connection.TRN diff --git a/qiita_db/support_files/patches/68.sql b/qiita_db/support_files/patches/68.sql new file mode 100644 index 000000000..2a0bcbe4a --- /dev/null +++ b/qiita_db/support_files/patches/68.sql @@ -0,0 +1,47 @@ +-- November 21, 2018 +-- moving sample and prep info files to jsonb + +-- Due to error: +-- ValueError: Error running SQL: OUT_OF_MEMORY. MSG: out of shared memory +-- HINT: You might need to increase max_locks_per_transaction. +-- we need to split the full patch in 4 so the continuation is 69.sql, +-- 70.sql and 71.sql + +-- 1/4 Sample template +DO $do$ +DECLARE + dyn_t varchar; + dyn_table varchar; + dyn_table_bk varchar; + sid varchar; +BEGIN + FOR dyn_t IN + SELECT DISTINCT table_name + FROM information_schema.columns + WHERE SUBSTR(table_name, 1, 7) = 'sample_' + AND table_schema = 'qiita' + AND table_name != 'sample_template_filepath' + LOOP + dyn_table := 'qiita.' || dyn_t; + dyn_table_bk := dyn_t || '_bk'; + + -- rename the tables so we can move the data later + EXECUTE format('ALTER TABLE %1$s RENAME TO %2$s', dyn_table, dyn_table_bk); + + -- create the new table, note that there are no constraints so the + -- inserts go fast but we will add them later + EXECUTE format('CREATE TABLE %1$s (sample_id VARCHAR NOT NULL, sample_values JSONB)', dyn_table); + + -- inserting our helper column qiita_sample_column_names, which is going keep all our columns; this is much easier than trying to keep all rows with the same values + EXECUTE 'INSERT INTO ' || dyn_table || ' (sample_id, sample_values) VALUES (''qiita_sample_column_names'', (''{"columns":'' || (SELECT json_agg(column_name::text) FROM information_schema.columns WHERE table_name=''' || dyn_table_bk || ''' AND table_schema=''qiita'' AND column_name != ''sample_id'')::text || ''}'')::json);'; + -- inserting value per value of the table, this might take forever + FOR sid IN + EXECUTE 'SELECT sample_id FROM qiita.' || dyn_table_bk + LOOP + EXECUTE 'INSERT INTO ' || dyn_table || ' (sample_id, sample_values) VALUES (''' || sid || ''', (SELECT row_to_json(t)::jsonb - ''sample_id'' FROM (SELECT * FROM qiita.' || dyn_table_bk || ' WHERE sample_id = ''' || sid || ''') t));'; + END LOOP; + + -- adding index + EXECUTE 'ALTER TABLE ' || dyn_table || ' ADD CONSTRAINT pk_jsonb_' || dyn_t || ' PRIMARY KEY ( sample_id );'; + END LOOP; +END $do$; diff --git a/qiita_db/support_files/patches/69.sql b/qiita_db/support_files/patches/69.sql new file mode 100644 index 000000000..50cca0937 --- /dev/null +++ b/qiita_db/support_files/patches/69.sql @@ -0,0 +1,49 @@ +-- November 21, 2018 +-- moving sample and prep info files to jsonb + +-- 2/4 This is the continuation of 68.sql, let's move the data for the +-- prep templates but only for prep ids < 3500 +DO $do$ +DECLARE + dyn_t varchar; + dyn_table varchar; + dyn_table_bk varchar; + sid varchar; +BEGIN + FOR dyn_t IN + SELECT DISTINCT table_name + FROM information_schema.columns + WHERE SUBSTR(table_name, 1, 5) = 'prep_' + AND table_schema = 'qiita' + AND table_name NOT IN ('prep_template', + 'prep_template_preprocessed_data', + 'prep_template_filepath', + 'prep_columns', + 'prep_template_processing_job', + 'prep_template_sample') + AND SUBSTR(table_name, 6)::INT < 3500 + LOOP + dyn_table := 'qiita.' || dyn_t; + dyn_table_bk := dyn_t || '_bk'; + + -- rename the tables so we can move the data later + EXECUTE format('ALTER TABLE %1$s RENAME TO %2$s', dyn_table, dyn_table_bk); + + -- create the new table, note that there are no constraints so the + -- inserts go fast but we will add them later + EXECUTE format('CREATE TABLE %1$s (sample_id VARCHAR NOT NULL, sample_values JSONB)', dyn_table); + + -- inserting our helper column qiita_sample_column_names, which is going keep all our columns; this is much easier than trying to keep all rows with the same values + EXECUTE 'INSERT INTO ' || dyn_table || ' (sample_id, sample_values) VALUES (''qiita_sample_column_names'', (''{"columns":'' || (SELECT json_agg(column_name::text) FROM information_schema.columns WHERE table_name=''' || dyn_table_bk || ''' AND table_schema=''qiita'' AND column_name != ''sample_id'')::text || ''}'')::json);'; + + -- inserting value per value of the table, this might take forever + FOR sid IN + EXECUTE 'SELECT sample_id FROM qiita.' || dyn_table_bk + LOOP + EXECUTE 'INSERT INTO ' || dyn_table || ' (sample_id, sample_values) VALUES (''' || sid || ''', (SELECT row_to_json(t)::jsonb - ''sample_id'' FROM (SELECT * FROM qiita.' || dyn_table_bk || ' WHERE sample_id = ''' || sid || ''') t));'; + END LOOP; + + -- adding index + EXECUTE 'ALTER TABLE ' || dyn_table || ' ADD CONSTRAINT pk_jsonb_' || dyn_t || ' PRIMARY KEY ( sample_id );'; + END LOOP; +END $do$; diff --git a/qiita_db/support_files/patches/70.sql b/qiita_db/support_files/patches/70.sql new file mode 100644 index 000000000..30542712f --- /dev/null +++ b/qiita_db/support_files/patches/70.sql @@ -0,0 +1,50 @@ +-- November 21, 2018 +-- moving sample and prep info files to jsonb + +-- 3/4 This is the continuation of the patching that started in 68.sql, let's +-- move the data for the prep templates but now for prep ids >= 3500 +DO $do$ +DECLARE + dyn_t varchar; + dyn_table varchar; + dyn_table_bk varchar; + sid varchar; +BEGIN + FOR dyn_t IN + SELECT DISTINCT table_name + FROM information_schema.columns + WHERE SUBSTR(table_name, 1, 5) = 'prep_' + AND table_schema = 'qiita' + AND table_name NOT IN ('prep_template', + 'prep_template_preprocessed_data', + 'prep_template_filepath', + 'prep_columns', + 'prep_template_processing_job', + 'prep_template_sample') + AND table_name NOT LIKE '%_bk' + AND SUBSTR(table_name, 6)::INT >= 3500 + LOOP + dyn_table := 'qiita.' || dyn_t; + dyn_table_bk := dyn_t || '_bk'; + + -- rename the tables so we can move the data later + EXECUTE format('ALTER TABLE %1$s RENAME TO %2$s', dyn_table, dyn_table_bk); + + -- create the new table, note that there are no constraints so the + -- inserts go fast but we will add them later + EXECUTE format('CREATE TABLE %1$s (sample_id VARCHAR NOT NULL, sample_values JSONB)', dyn_table); + + -- inserting our helper column qiita_sample_column_names, which is going keep all our columns; this is much easier than trying to keep all rows with the same values + EXECUTE 'INSERT INTO ' || dyn_table || ' (sample_id, sample_values) VALUES (''qiita_sample_column_names'', (''{"columns":'' || (SELECT json_agg(column_name::text) FROM information_schema.columns WHERE table_name=''' || dyn_table_bk || ''' AND table_schema=''qiita'' AND column_name != ''sample_id'')::text || ''}'')::json);'; + + -- inserting value per value of the table, this might take forever + FOR sid IN + EXECUTE 'SELECT sample_id FROM qiita.' || dyn_table_bk + LOOP + EXECUTE 'INSERT INTO ' || dyn_table || ' (sample_id, sample_values) VALUES (''' || sid || ''', (SELECT row_to_json(t)::jsonb - ''sample_id'' FROM (SELECT * FROM qiita.' || dyn_table_bk || ' WHERE sample_id = ''' || sid || ''') t));'; + END LOOP; + + -- adding index + EXECUTE 'ALTER TABLE ' || dyn_table || ' ADD CONSTRAINT pk_jsonb_' || dyn_t || ' PRIMARY KEY ( sample_id );'; + END LOOP; +END $do$; diff --git a/qiita_db/support_files/patches/71.sql b/qiita_db/support_files/patches/71.sql new file mode 100644 index 000000000..5e9245a2c --- /dev/null +++ b/qiita_db/support_files/patches/71.sql @@ -0,0 +1,19 @@ +-- November 21, 2018 +-- moving sample and prep info files to jsonb + +-- 4/4 This is the continuation of the patching that started in 68.sql, let's +-- remove all the temp (_bk) tables we created + +-- Dropping all the _bk tables +DO $do$ +DECLARE + dyn_table varchar; +BEGIN + FOR dyn_table IN + SELECT DISTINCT table_name + FROM information_schema.columns + WHERE table_name LIKE '%_bk' + LOOP + EXECUTE 'DROP TABLE qiita.' || dyn_table; + END LOOP; +END $do$; diff --git a/qiita_db/support_files/populate_test_db.sql b/qiita_db/support_files/populate_test_db.sql index b99da271a..019d2162c 100644 --- a/qiita_db/support_files/populate_test_db.sql +++ b/qiita_db/support_files/populate_test_db.sql @@ -35,12 +35,12 @@ INSERT INTO qiita.study (email, emp_person_id, first_contact, mixs_compliant, most_recent_contact, number_samples_collected, number_samples_promised, principal_investigator_id, reprocess, spatial_series, study_title, study_alias, study_description, - study_abstract, vamps_id, ebi_study_accession, ebi_submission_status) VALUES + study_abstract, vamps_id, ebi_study_accession) VALUES ('test@foo.bar', 2, '2014-05-19 16:10', NULL, 1, 1, TRUE, TRUE, '2014-05-19 16:11', 27, 27, 3, FALSE, FALSE, 'Identification of the Microbiomes for Cannabis Soils', 'Cannabis Soils', 'Analysis of the Cannabis Plant Microbiome', 'This is a preliminary study to examine the microbiota associated with the Cannabis plant. Soils samples from the bulk soil, soil associated with the roots, and the rhizosphere were extracted and the DNA sequenced. Roots from three independent plants of different strains were examined. These roots were obtained November 11, 2011 from plants that had been harvested in the summer. Future studies will attempt to analyze the soils and rhizospheres from the same location at different time points in the plant lifecycle.', - NULL, 'EBI123456-BB', 'submitted'); + NULL, 'EBI123456-BB'); -- Add portal to the study INSERT INTO qiita.study_portal (study_id, portal_type_id) VALUES (1, 1); @@ -52,8 +52,14 @@ INSERT INTO qiita.study_environmental_package (study_id, environmental_package_n INSERT INTO qiita.study_users (study_id, email) VALUES (1, 'shared@foo.bar'); -- Insert PMIDs for study -INSERT INTO qiita.publication (doi, pubmed_id) VALUES ('10.100/123456', '123456'), ('10.100/7891011', '7891011'); -INSERT INTO qiita.study_publication (study_id, publication_doi) VALUES (1, '10.100/123456'), (1, '10.100/7891011'); +INSERT INTO qiita.publication (doi, pubmed_id) VALUES + ('10.100/123456', '123456'), + ('10.100/7891011', '7891011'); +INSERT INTO qiita.study_publication (study_id, publication, is_doi) VALUES + (1, '10.100/123456', true), + (1, '123456', false), + (1, '10.100/7891011', true), + (1, '7891011', false); -- Insert an investigation INSERT INTO qiita.investigation (investigation_name, investigation_description, contact_person_id) VALUES @@ -130,37 +136,38 @@ CREATE TABLE qiita.sample_1 ( -- Populates the sample_1 dynamic table INSERT INTO qiita.sample_1 (sample_id, season_environment, assigned_from_geo, texture, taxon_id, depth, host_taxid, common_name, water_content_soil, elevation, temp, tot_nitro, samp_salinity, altitude, env_biome, country, ph, anonymized_name, tot_org_carb, description_duplicate, env_feature, physical_specimen_location, physical_specimen_remaining, dna_extracted, sample_type, collection_timestamp, host_subject_id, description, latitude, longitude, scientific_name) VALUES - ('1.SKM7.640188', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '1118232', '0.15', '3483', 'root metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM7', '3.31', 'Bucu Roots', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B6', 'Cannabis Soil Microbiome', 60.1102854322, 74.7123248382, '1118232'), - ('1.SKD9.640182', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '1118232', '0.15', '3483', 'root metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKD9', '4.32', 'Diesel Root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D3', 'Cannabis Soil Microbiome', 23.1218032799, 42.838497795, '1118232'), - ('1.SKM8.640201', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '1118232', '0.15', '3483', 'root metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM8', '3.31', 'Bucu Roots', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D8', 'Cannabis Soil Microbiome', 3.21190859967, 26.8138925876, '1118232'), - ('1.SKB8.640193', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '1118232', '0.15', '3483', 'root metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB8', '5', 'Burmese root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:M7', 'Cannabis Soil Microbiome', 74.0894932572, 65.3283470202, '1118232'), - ('1.SKD2.640178', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '410658', '0.15', '3483', 'soil metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD2', '4.32', 'Diesel bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B5', 'Cannabis Soil Microbiome', 53.5050692395, 31.6056761814, '1118232'), - ('1.SKM3.640197', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '410658', '0.15', '3483', 'soil metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM3', '3.31', 'Bucu bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B7', 'Cannabis Soil Microbiome', 'Not applicable', 31.2003474585, '1118232'), - ('1.SKM4.640180', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM4', '3.31', 'Bucu Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D2', 'Cannabis Soil Microbiome', 'Not applicable', 'Not applicable', '1118232'), - ('1.SKB9.640200', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '1118232', '0.15', '3483', 'root metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKB9', '5', 'Burmese root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B3', 'Cannabis Soil Microbiome', 12.6245524972, 96.0693176066, '1118232'), - ('1.SKB4.640189', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB4', '5', 'Burmese Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D7', 'Cannabis Soil Microbiome', 43.9614715197, 82.8516734159, '1118232'), - ('1.SKB5.640181', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB5', '5', 'Burmese Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:M4', 'Cannabis Soil Microbiome', 10.6655599093, 70.784770579, '1118232'), - ('1.SKB6.640176', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB6', '5', 'Burmese Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D5', 'Cannabis Soil Microbiome', 78.3634273709, 74.423907894, '1118232'), - ('1.SKM2.640199', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '410658', '0.15', '3483', 'soil metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM2', '3.31', 'Bucu bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D4', 'Cannabis Soil Microbiome', 82.8302905615, 86.3615778099, '1118232'), - ('1.SKM5.640177', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM5', '3.31', 'Bucu Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:M3', 'Cannabis Soil Microbiome', 44.9725384282, 66.1920014699, '1118232'), - ('1.SKB1.640202', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '410658', '0.15', '3483', 'soil metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB1', '5', 'Burmese bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:M2', 'Cannabis Soil Microbiome', 4.59216095574, 63.5115213108, '1118232'), - ('1.SKD8.640184', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '1118232', '0.15', '3483', 'root metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD8', '4.32', 'Diesel Root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D9', 'Cannabis Soil Microbiome', 57.571893782, 32.5563076447, '1118232'), - ('1.SKD4.640185', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD4', '4.32', 'Diesel Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:M9', 'Cannabis Soil Microbiome', 40.8623799474, 6.66444220187, '1118232'), - ('1.SKB3.640195', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '410658', '0.15', '3483', 'soil metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB3', '5', 'Burmese bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:M6', 'Cannabis Soil Microbiome', 95.2060749748, 27.3592668624, '1118232'), - ('1.SKM1.640183', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '410658', '0.15', '3483', 'soil metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM1', '3.31', 'Bucu bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D1', 'Cannabis Soil Microbiome', 38.2627021402, 3.48274264219, '1118232'), - ('1.SKB7.640196', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '1118232', '0.15', '3483', 'root metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB7', '5', 'Burmese root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:M8', 'Cannabis Soil Microbiome', 13.089194595, 92.5274472082, '1118232'), - ('1.SKD3.640198', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '410658', '0.15', '3483', 'soil metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD3', '4.32', 'Diesel bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B1', 'Cannabis Soil Microbiome', 84.0030227585, 66.8954849864, '1118232'), - ('1.SKD7.640191', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '1118232', '0.15', '3483', 'root metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD7', '4.32', 'Diesel Root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:D6', 'Cannabis Soil Microbiome', 68.51099627, 2.35063674718, '1118232'), - ('1.SKD6.640190', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD6', '4.32', 'Diesel Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B9', 'Cannabis Soil Microbiome', 29.1499460692, 82.1270418227, '1118232'), - ('1.SKB2.640194', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '410658', '0.15', '3483', 'soil metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB2', '5', 'Burmese bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B4', 'Cannabis Soil Microbiome', 35.2374368957, 68.5041623253, '1118232'), - ('1.SKM9.640192', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '1118232', '0.15', '3483', 'root metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM9', '3.31', 'Bucu Roots', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B8', 'Cannabis Soil Microbiome', 12.7065957714, 84.9722975792, '1118232'), - ('1.SKM6.640187', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM6', '3.31', 'Bucu Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:B2', 'Cannabis Soil Microbiome', 0.291867635913, 68.5945325743, '1118232'), - ('1.SKD5.640186', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD5', '4.32', 'Diesel Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:M1', 'Cannabis Soil Microbiome', 85.4121476399, 15.6526750776, '1118232'), - ('1.SKD1.640179', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '410658', '0.15', '3483', 'soil metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD1', '4.32', 'Diesel bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '11/11/11 13:00:00', '1001:M5', 'Cannabis Soil Microbiome', 68.0991287718, 34.8360987059, '1118232'); + ('1.SKM7.640188', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '1118232', '0.15', '3483', 'root metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM7', '3.31', 'Bucu Roots', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:B6', 'Cannabis Soil Microbiome', 60.1102854322, 74.7123248382, '1118232'), + ('1.SKD9.640182', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '1118232', '0.15', '3483', 'root metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKD9', '4.32', 'Diesel Root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:D3', 'Cannabis Soil Microbiome', 23.1218032799, 42.838497795, '1118232'), + ('1.SKM8.640201', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '1118232', '0.15', '3483', 'root metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM8', '3.31', 'Bucu Roots', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:D8', 'Cannabis Soil Microbiome', 3.21190859967, 26.8138925876, '1118232'), + ('1.SKB8.640193', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '1118232', '0.15', '3483', 'root metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB8', '5', 'Burmese root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:M7', 'Cannabis Soil Microbiome', 74.0894932572, 65.3283470202, '1118232'), + ('1.SKD2.640178', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '410658', '0.15', '3483', 'soil metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD2', '4.32', 'Diesel bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:B5', 'Cannabis Soil Microbiome', 53.5050692395, 31.6056761814, '1118232'), + ('1.SKM3.640197', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '410658', '0.15', '3483', 'soil metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM3', '3.31', 'Bucu bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:B7', 'Cannabis Soil Microbiome', 'Not applicable', 31.2003474585, '1118232'), + ('1.SKM4.640180', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM4', '3.31', 'Bucu Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:D2', 'Cannabis Soil Microbiome', 'Not applicable', 'Not applicable', '1118232'), + ('1.SKB9.640200', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '1118232', '0.15', '3483', 'root metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKB9', '5', 'Burmese root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:B3', 'Cannabis Soil Microbiome', 12.6245524972, 96.0693176066, '1118232'), + ('1.SKB4.640189', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB4', '5', 'Burmese Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:D7', 'Cannabis Soil Microbiome', 43.9614715197, 82.8516734159, '1118232'), + ('1.SKB5.640181', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB5', '5', 'Burmese Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:M4', 'Cannabis Soil Microbiome', 10.6655599093, 70.784770579, '1118232'), + ('1.SKB6.640176', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB6', '5', 'Burmese Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:D5', 'Cannabis Soil Microbiome', 78.3634273709, 74.423907894, '1118232'), + ('1.SKM2.640199', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '410658', '0.15', '3483', 'soil metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM2', '3.31', 'Bucu bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:D4', 'Cannabis Soil Microbiome', 82.8302905615, 86.3615778099, '1118232'), + ('1.SKM5.640177', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM5', '3.31', 'Bucu Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:M3', 'Cannabis Soil Microbiome', 44.9725384282, 66.1920014699, '1118232'), + ('1.SKB1.640202', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '410658', '0.15', '3483', 'soil metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB1', '5', 'Burmese bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:M2', 'Cannabis Soil Microbiome', 4.59216095574, 63.5115213108, '1118232'), + ('1.SKD8.640184', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '1118232', '0.15', '3483', 'root metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD8', '4.32', 'Diesel Root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:D9', 'Cannabis Soil Microbiome', 57.571893782, 32.5563076447, '1118232'), + ('1.SKD4.640185', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD4', '4.32', 'Diesel Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:M9', 'Cannabis Soil Microbiome', 40.8623799474, 6.66444220187, '1118232'), + ('1.SKB3.640195', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '410658', '0.15', '3483', 'soil metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB3', '5', 'Burmese bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:M6', 'Cannabis Soil Microbiome', 95.2060749748, 27.3592668624, '1118232'), + ('1.SKM1.640183', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '410658', '0.15', '3483', 'soil metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM1', '3.31', 'Bucu bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:D1', 'Cannabis Soil Microbiome', 38.2627021402, 3.48274264219, '1118232'), + ('1.SKB7.640196', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '1118232', '0.15', '3483', 'root metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB7', '5', 'Burmese root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:M8', 'Cannabis Soil Microbiome', 13.089194595, 92.5274472082, '1118232'), + ('1.SKD3.640198', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '410658', '0.15', '3483', 'soil metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD3', '4.32', 'Diesel bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:B1', 'Cannabis Soil Microbiome', 84.0030227585, 66.8954849864, '1118232'), + ('1.SKD7.640191', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '1118232', '0.15', '3483', 'root metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD7', '4.32', 'Diesel Root', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:D6', 'Cannabis Soil Microbiome', 68.51099627, 2.35063674718, '1118232'), + ('1.SKD6.640190', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD6', '4.32', 'Diesel Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:B9', 'Cannabis Soil Microbiome', 29.1499460692, 82.1270418227, '1118232'), + ('1.SKB2.640194', 'winter', 'n', '64.6 sand, 17.6 silt, 17.8 clay', '410658', '0.15', '3483', 'soil metagenome', '0.164', '114', '15', '1.41', '7.15', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.94', 'SKB2', '5', 'Burmese bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:B4', 'Cannabis Soil Microbiome', 35.2374368957, 68.5041623253, '1118232'), + ('1.SKM9.640192', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '1118232', '0.15', '3483', 'root metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM9', '3.31', 'Bucu Roots', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:B8', 'Cannabis Soil Microbiome', 12.7065957714, 84.9722975792, '1118232'), + ('1.SKM6.640187', 'winter', 'n', '63.1 sand, 17.7 silt, 19.2 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.101', '114', '15', '1.3', '7.44', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.82', 'SKM6', '3.31', 'Bucu Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:B2', 'Cannabis Soil Microbiome', 0.291867635913, 68.5945325743, '1118232'), + ('1.SKD5.640186', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '939928', '0.15', '3483', 'rhizosphere metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD5', '4.32', 'Diesel Rhizo', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:M1', 'Cannabis Soil Microbiome', 85.4121476399, 15.6526750776, '1118232'), + ('1.SKD1.640179', 'winter', 'n', '66 sand, 16.3 silt, 17.7 clay', '410658', '0.15', '3483', 'soil metagenome', '0.178', '114', '15', '1.51', '7.1', '0', 'ENVO:Temperate grasslands, savannas, and shrubland biome', 'GAZ:United States of America', '6.8', 'SKD1', '4.32', 'Diesel bulk', 'ENVO:plant-associated habitat', 'ANL', TRUE, TRUE, 'ENVO:soil', '2011-11-11 13:00:00', '1001:M5', 'Cannabis Soil Microbiome', 68.0991287718, 34.8360987059, '1118232'); -- Create a new prep template for the added raw data -INSERT INTO qiita.prep_template (data_type_id, preprocessing_status, investigation_type) VALUES (2, 'success', 'Metagenomics'); -INSERT INTO qiita.prep_template (data_type_id, preprocessing_status, investigation_type) VALUES (2, 'success', 'Metagenomics'); +INSERT INTO qiita.prep_template (data_type_id, preprocessing_status, investigation_type, artifact_id, name) VALUES + (2, 'success', 'Metagenomics', NULL, 'Prep information 1'), + (2, 'success', 'Metagenomics', NULL, 'Prep information 2'); -- Add the common prep info for study 1 INSERT INTO qiita.prep_template_sample (prep_template_id, sample_id, ebi_experiment_accession) VALUES @@ -344,40 +351,25 @@ INSERT INTO qiita.study_prep_template (study_id, prep_template_id) VALUES (1, 1) INSERT INTO qiita.study_prep_template (study_id, prep_template_id) VALUES (1, 2); -- Insert some artifacts --- 1 (Raw fastq) ---> 2 (demultiplexed) ---> 4 (otu table) --- \-> 3 (demultiplexed) -INSERT INTO qiita.artifact (name, generated_timestamp, command_id, command_parameters, - visibility_id, artifact_type_id, data_type_id) - VALUES ('Raw data 1', 'Mon Oct 1 09:30:27 2012', NULL, NULL, 3, 3, 2), - ('Demultiplexed 1', 'Mon Oct 1 10:30:27 2012', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":false,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}'::json, - 3, 6, 2), - ('Demultiplexed 2', 'Mon Oct 1 11:30:27 2012', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":true,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}'::json, - 3, 6, 2), - ('BIOM', 'Tue Oct 2 17:30:00 2012', 3, '{"reference":1,"sortmerna_e_value":1,"sortmerna_max_pos":10000,"similarity":0.97,"sortmerna_coverage":0.97,"threads":1,"input_data":2}'::json, - 3, 7, 2), - ('BIOM', 'Tue Oct 2 17:30:00 2012', 3, '{"reference":1,"sortmerna_e_value":1,"sortmerna_max_pos":10000,"similarity":0.97,"sortmerna_coverage":0.97,"threads":1,"input_data":2}'::json, - 3, 7, 2), - ('BIOM', 'Tue Oct 2 17:30:00 2012', 3, '{"reference":2,"sortmerna_e_value":1,"sortmerna_max_pos":10000,"similarity":0.97,"sortmerna_coverage":0.97,"threads":1,"input_data":2}'::json, - 3, 7, 1), - ('BIOM', 'Tue Oct 2 17:30:00 2012', NULL, NULL, - 3, 7, 1); +INSERT INTO qiita.artifact (generated_timestamp, command_id, command_parameters, visibility_id, artifact_type_id, data_type_id, submitted_to_vamps, name) VALUES + ('2012-10-01 09:30:27', NULL, NULL, 3, 3, 2, false, 'Raw data 1'), + ('2012-10-01 10:30:27', 1, '{"max_barcode_errors": "1.5", "max_bad_run_length": "3", "phred_offset": "auto", "rev_comp": "False", "phred_quality_threshold": "3", "input_data": "1", "rev_comp_barcode": "False", "sequence_max_n": "0", "rev_comp_mapping_barcodes": "False", "min_per_read_length_fraction": "0.75", "barcode_type": "golay_12"}', 3, 6, 2, false, 'Demultiplexed 1'), + ('2012-10-01 11:30:27', 1, '{"max_barcode_errors": "1.5", "max_bad_run_length": "3", "phred_offset": "auto", "rev_comp": "False", "phred_quality_threshold": "3", "input_data": "1", "rev_comp_barcode": "False", "sequence_max_n": "0", "rev_comp_mapping_barcodes": "True", "min_per_read_length_fraction": "0.75", "barcode_type": "golay_12"}', 3, 6, 2, false, 'Demultiplexed 2'), + ('2012-10-02 17:30:00', 3, '{"reference": "1", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, 7, 2, false, 'BIOM'), + ('2012-10-02 17:30:00', 3, '{"reference": "1", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, 7, 2, false, 'BIOM'), + ('2012-10-02 17:30:00', 3, '{"reference": "2", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, 7, 1, false, 'BIOM'), + ('2012-10-02 17:30:00', NULL, NULL, 3, 7, 1, false, 'BIOM'), + ('2018-12-03 14:06:45.117389', NULL, NULL, 4, 7, 2, false, 'noname'), + ('2018-12-03 14:06:45.117389', 12, '{"biom_table": "8", "depth": "9000", "subsample_multinomial": "False"}', 4, 7, 2, false, 'noname'); + +-- link new artifacts with prep info files +UPDATE qiita.prep_template SET artifact_id = 1 WHERE prep_template_id = 1; +UPDATE qiita.prep_template SET artifact_id = 7 WHERE prep_template_id = 2; -- Link the child artifacts with their parents artifacts INSERT INTO qiita.parent_artifact (parent_id, artifact_id) VALUES (1, 2), (1, 3), - (2, 4), (2, 5), (2, 6); - --- Insert the jobs that processed the previous artifacts -INSERT INTO qiita.processing_job (processing_job_id, email, command_id, command_parameters, processing_job_status_id) - VALUES ('6d368e16-2242-4cf8-87b4-a5dc40bb890b', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":false,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}'::json, 3), - ('4c7115e8-4c8e-424c-bf25-96c292ca1931', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":true,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}'::json, 3), - ('3c9991ab-6c14-4368-a48c-841e8837a79c', 'test@foo.bar', 3, '{"reference":1,"sortmerna_e_value":1,"sortmerna_max_pos":10000,"similarity":0.97,"sortmerna_coverage":0.97,"threads":1,"input_data":2}'::json, 3); - --- Relate the above jobs with the artifacts -INSERT INTO qiita.artifact_processing_job (artifact_id, processing_job_id) - VALUES (1, '6d368e16-2242-4cf8-87b4-a5dc40bb890b'), - (1, '4c7115e8-4c8e-424c-bf25-96c292ca1931'), - (2, '3c9991ab-6c14-4368-a48c-841e8837a79c'); + (2, 4), (2, 5), (2, 6), (8, 9); -- Insert filepaths for the artifacts and reference INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) @@ -394,12 +386,6 @@ INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algor ('Silva_97_otu_taxonomy.txt', 11, '852952723', 1, 6), ('1_study_1001_closed_reference_otu_table_Silva.biom', 7, '852952723', 1, 4); --- Link the artifacts with the filepaths -INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) - VALUES (1, 1), (1, 2), - (2, 3), (2, 4), (2, 5), - (4, 9), (5, 9), (6, 12); - -- Link the artifact with the prep template UPDATE qiita.prep_template SET artifact_id = 1 WHERE prep_template_id = 1; UPDATE qiita.prep_template SET artifact_id = 7 WHERE prep_template_id = 2; @@ -448,30 +434,34 @@ INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algor ('1_job_result.txt', 9, '852952723', 1, 2), ('2_test_folder', 8, '852952723', 1, 2); --- Insert jobs -INSERT INTO qiita.job (data_type_id, job_status_id, command_id, options, input_file_reference_id, input_file_software_command_id) VALUES -(2, 1, 1, '{"--otu_table_fp":1}', 1, 3), -(2, 3, 2, '{"--mapping_fp":1,"--otu_table_fp":1}', 2, 3), -(2, 1, 2, '{"--mapping_fp":1,"--otu_table_fp":1}', 1, 3); - --- Add job results -INSERT INTO qiita.job_results_filepath (job_id, filepath_id) VALUES (1, 13), (2, 14); - -- Insert Analysis -INSERT INTO qiita.analysis (email, name, description, analysis_status_id, pmid) VALUES ('test@foo.bar', 'SomeAnalysis', 'A test analysis', 1, '121112'), ('admin@foo.bar', 'SomeSecondAnalysis', 'Another test analysis', 1, '22221112'); -INSERT INTO qiita.analysis_portal (analysis_id, portal_type_id) VALUES (1, 1), (2, 1); --- Insert Analysis Workflow -INSERT INTO qiita.analysis_workflow (analysis_id, step) VALUES (1, 3), (2, 3); - --- Attach jobs to analysis -INSERT INTO qiita.analysis_job (analysis_id, job_id) VALUES (1, 1), (1, 2), (2, 3); +INSERT INTO qiita.analysis (email, name, description, pmid, "timestamp", dflt, logging_id) VALUES + ('test@foo.bar', 'SomeAnalysis', 'A test analysis', '121112', '2018-12-03 13:52:42.751331-07', false, NULL), + ('admin@foo.bar', 'SomeSecondAnalysis', 'Another test analysis', '22221112', '2018-12-03 13:52:42.751331-07', false, NULL), + ('test@foo.bar', 'test@foo.bar-dflt-1', 'dflt', NULL, '2018-12-03 13:52:42.751331-07', true, NULL), + ('admin@foo.bar', 'admin@foo.bar-dflt-1', 'dflt', NULL, '2018-12-03 13:52:42.751331-07', true, NULL), + ('shared@foo.bar', 'shared@foo.bar-dflt-1', 'dflt', NULL, '2018-12-03 13:52:42.751331-07', true, NULL), + ('demo@microbio.me', 'demo@microbio.me-dflt-1', 'dflt', NULL, '2018-12-03 13:52:42.751331-07', true, NULL), + ('test@foo.bar', 'test@foo.bar-dflt-2', 'dflt', NULL, '2018-12-03 13:52:42.751331-07', true, NULL), + ('admin@foo.bar', 'admin@foo.bar-dflt-2', 'dflt', NULL, '2018-12-03 13:52:42.751331-07', true, NULL), + ('shared@foo.bar', 'shared@foo.bar-dflt-2', 'dflt', NULL, '2018-12-03 13:52:42.751331-07', true, NULL), + ('demo@microbio.me', 'demo@microbio.me-dflt-2', 'dflt', NULL, '2018-12-03 13:52:42.751331-07', true, NULL); +INSERT INTO qiita.analysis_portal (analysis_id, portal_type_id) VALUES + (1, 1), (2, 1), (3, 1), (4, 1),(5, 1), (6, 1), (7, 2), (8, 2), (9, 2), (10, 2); + +INSERT INTO qiita.analysis_artifact (analysis_id, artifact_id) VALUES + (1, 8), + (1, 9); -- Insert filepath for analysis biom files INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES -('1_analysis_18S.biom', 7, '852952723', 1, 1), ('1_analysis_mapping.txt', 9, '852952723', 1, 1); + ('1_analysis_18S.biom', 7, '852952723', 1, 1), + ('1_analysis_mapping.txt', 9, '852952723', 1, 1); -- Attach filepath to analysis -INSERT INTO qiita.analysis_filepath (analysis_id, filepath_id, data_type_id) VALUES (1, 15, 2), (1, 16, NULL); +INSERT INTO qiita.analysis_filepath (analysis_id, filepath_id, data_type_id) VALUES + (1, 15, 2), + (1, 16, NULL); -- Attach samples to analysis INSERT INTO qiita.analysis_sample (analysis_id, artifact_id, sample_id) VALUES @@ -480,9 +470,10 @@ INSERT INTO qiita.analysis_sample (analysis_id, artifact_id, sample_id) VALUES (1, 5, '1.SKB8.640193'), (1, 5, '1.SKD8.640184'), (1, 5, '1.SKB7.640196'), (1, 5, '1.SKM9.640192'), (1, 5, '1.SKM4.640180'), (2, 5, '1.SKB8.640193'), (2, 5, '1.SKD8.640184'), (2, 5, '1.SKB7.640196'), (2, 5, '1.SKM3.640197'), (1, 6, '1.SKB8.640193'), (1, 6, '1.SKD8.640184'), (1, 6, '1.SKB7.640196'), (1, 6, '1.SKM9.640192'), (1, 6, '1.SKM4.640180'), -(2, 6, '1.SKB8.640193'), (2, 6, '1.SKD8.640184'), (2, 6, '1.SKB7.640196'), (2, 6, '1.SKM3.640197'); +(2, 6, '1.SKB8.640193'), (2, 6, '1.SKD8.640184'), (2, 6, '1.SKB7.640196'), (2, 6, '1.SKM3.640197'), +(3, 4, '1.SKD8.640184'), (3, 4, '1.SKB7.640196'), (3, 4, '1.SKM9.640192'), (3, 4, '1.SKM4.640180'); ---Share analysis with shared user +-- Share analysis with shared user INSERT INTO qiita.analysis_users (analysis_id, email) VALUES (1, 'shared@foo.bar'); -- Add an ontology @@ -508,24 +499,6 @@ INSERT INTO qiita.term (term_id, ontology_id, term, identifier, definition, name INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES ('1_19700101-000000.txt', 14, '852952723', 1, 9); INSERT INTO qiita.sample_template_filepath VALUES (1, 17); ---add collection to the database -INSERT INTO qiita.collection (email, name, description) VALUES ('test@foo.bar', 'TEST_COLLECTION', 'collection for testing purposes'); - ---associate analyses and jobs with collection -INSERT INTO qiita.collection_analysis (collection_id, analysis_id) VALUES (1, 1); -INSERT INTO qiita.collection_job (collection_id, job_id) VALUES (1, 1); - ---share collection with shared user -INSERT INTO qiita.collection_users (email, collection_id) VALUES ('shared@foo.bar', 1); - ---add default analysis for users -INSERT INTO qiita.analysis (email, name, description, dflt, analysis_status_id) VALUES ('test@foo.bar', 'test@foo.bar-dflt-1', 'dflt', true, 1), ('admin@foo.bar', 'admin@foo.bar-dflt-1', 'dflt', true, 1), ('shared@foo.bar', 'shared@foo.bar-dflt-1', 'dflt', true, 1), ('demo@microbio.me', 'demo@microbio.me-dflt-1', 'dflt', true, 1), ('test@foo.bar', 'test@foo.bar-dflt-2', 'dflt', true, 1), ('admin@foo.bar', 'admin@foo.bar-dflt-2', 'dflt', true, 2), ('shared@foo.bar', 'shared@foo.bar-dflt-2', 'dflt', true, 2), ('demo@microbio.me', 'demo@microbio.me-dflt-2', 'dflt', true, 2); -INSERT INTO qiita.analysis_portal (analysis_id, portal_type_id) VALUES (3, 1), (4, 1), (5, 1), (6, 1), (7, 2), (8, 2), (9, 2), (10, 2); - --- Attach samples to analysis -INSERT INTO qiita.analysis_sample (analysis_id, artifact_id, sample_id) - VALUES (3, 4, '1.SKD8.640184'), (3, 4, '1.SKB7.640196'), (3, 4, '1.SKM9.640192'), (3, 4, '1.SKM4.640180'); - -- Create the new prep_template_filepath INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES ('1_prep_1_19700101-000000.txt', 15, '3703494589', 1, 9); INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES ('1_prep_1_qiime_19700101-000000.txt', 16, '3703494589', 1, 9); @@ -535,6 +508,17 @@ INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algor INSERT INTO qiita.prep_template_filepath VALUES (1, 20), (1, 21); +-- Inserting the BIOM artifact filepath +INSERT INTO qiita.filepath (filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES + ('biom_table.biom', 7, 3574395811, 1, 16); + +-- Link the artifacts with the filepaths +INSERT INTO qiita.artifact_filepath (artifact_id, filepath_id) + VALUES (1, 1), (1, 2), + (2, 3), (2, 4), (2, 5), + (4, 9), (5, 9), (6, 12), + (8, 22), (9, 15); + -- Create some test messages INSERT INTO qiita.message (message) VALUES ('message 1'), ('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque sed auctor ex, non placerat sapien. Vestibulum vestibulum massa ut sapien condimentum, cursus consequat diam sodales. Nulla aliquam arcu ut massa auctor, et vehicula mauris tempor. In lacinia viverra ante quis pellentesque. Nunc vel mi accumsan, porttitor eros ut, pharetra elit. Nulla ac nisi quis dui egestas malesuada vitae ut mauris. Morbi blandit non nisl a finibus. In erat velit, congue at ipsum sit amet, venenatis bibendum sem. Curabitur vel odio sed est rutrum rutrum. Quisque efficitur ut purus in ultrices. Pellentesque eu auctor justo.'), ('message 3'); INSERT INTO qiita.message_user (message_id, email) VALUES (1, 'test@foo.bar'),(1, 'shared@foo.bar'),(2, 'test@foo.bar'),(3, 'test@foo.bar'); @@ -545,20 +529,44 @@ INSERT INTO qiita.logging (time, severity_id, msg, information) ('Sun Nov 22 21:29:30 2015', 2, 'Error message', '{}'); -- Create some processing jobs -INSERT INTO qiita.processing_job - (processing_job_id, email, command_id, command_parameters, - processing_job_status_id, logging_id, heartbeat, step) - VALUES ('063e553b-327c-4818-ab4a-adfe58e49860', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":false,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}'::json, 1, NULL, NULL, NULL), - ('bcc7ebcd-39c1-43e4-af2d-822e3589f14d', 'test@foo.bar', 2, '{"min_seq_len":100,"max_seq_len":1000,"trim_seq_length":false,"min_qual_score":25,"max_ambig":6,"max_homopolymer":6,"max_primer_mismatch":0,"barcode_type":"golay_12","max_barcode_errors":1.5,"disable_bc_correction":false,"qual_score_window":0,"disable_primers":false,"reverse_primers":"disable","reverse_primer_mismatches":0,"truncate_ambi_bases":false,"input_data":1}'::json, 2, NULL, 'Sun Nov 22 21:00:00 2015', 'demultiplexing'), - ('b72369f9-a886-4193-8d3d-f7b504168e75', 'shared@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":true,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}'::json, 3, NULL, 'Sun Nov 22 21:15:00 2015', NULL), - ('d19f76ee-274e-4c1b-b3a2-a12d73507c55', 'shared@foo.bar', 3, '{"reference":1,"sortmerna_e_value":1,"sortmerna_max_pos":10000,"similarity":0.97,"sortmerna_coverage":0.97,"threads":1,"input_data":2}'::json, 4, 1, 'Sun Nov 22 21:30:00 2015', 'generating demux file'), - ('ac653cb5-76a6-4a45-929e-eb9b2dee6b63', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":false,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1}'::json, 5, NULL, NULL, NULL); - -INSERT INTO qiita.artifact_processing_job (artifact_id, processing_job_id) - VALUES (1, '063e553b-327c-4818-ab4a-adfe58e49860'), - (1, 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d'), - (1, 'b72369f9-a886-4193-8d3d-f7b504168e75'), - (2, 'd19f76ee-274e-4c1b-b3a2-a12d73507c55'); +INSERT INTO qiita.processing_job (processing_job_id, email, command_id, command_parameters, processing_job_status_id, logging_id, heartbeat, step, pending, hidden) VALUES + ('6d368e16-2242-4cf8-87b4-a5dc40bb890b', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":false,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}', 3, NULL, NULL, NULL, NULL, false), + ('4c7115e8-4c8e-424c-bf25-96c292ca1931', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":true,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}', 3, NULL, NULL, NULL, NULL, false), + ('3c9991ab-6c14-4368-a48c-841e8837a79c', 'test@foo.bar', 3, '{"reference":1,"sortmerna_e_value":1,"sortmerna_max_pos":10000,"similarity":0.97,"sortmerna_coverage":0.97,"threads":1,"input_data":2}', 3, NULL, NULL, NULL, NULL, false), + ('b72369f9-a886-4193-8d3d-f7b504168e75', 'shared@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":true,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}', 3, NULL, '2015-11-22 21:15:00', NULL, NULL, false), + ('46b76f74-e100-47aa-9bf2-c0208bcea52d', 'test@foo.bar', 1, '{"max_barcode_errors": "1.5", "sequence_max_n": "0", "max_bad_run_length": "3", "phred_offset": "auto", "rev_comp": "False", "phred_quality_threshold": "3", "input_data": "1", "rev_comp_barcode": "False", "rev_comp_mapping_barcodes": "True", "min_per_read_length_fraction": "0.75", "barcode_type": "golay_12"}', 3, NULL, NULL, NULL, NULL, false), + ('80bf25f3-5f1d-4e10-9369-315e4244f6d5', 'test@foo.bar', 3, '{"reference": "2", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, NULL, NULL, NULL, NULL, false), + ('9ba5ae7a-41e1-4202-b396-0259aeaac366', 'test@foo.bar', 3, '{"reference": "1", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, NULL, NULL, NULL, NULL, false), + ('e5609746-a985-41a1-babf-6b3ebe9eb5a9', 'test@foo.bar', 3, '{"reference": "1", "similarity": "0.97", "sortmerna_e_value": "1", "sortmerna_max_pos": "10000", "input_data": "2", "threads": "1", "sortmerna_coverage": "0.97"}', 3, NULL, NULL, NULL, NULL, false), + ('6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f', 'test@foo.bar', 1, '{"max_barcode_errors": "1.5", "sequence_max_n": "0", "max_bad_run_length": "3", "phred_offset": "auto", "rev_comp": "False", "phred_quality_threshold": "3", "input_data": "1", "rev_comp_barcode": "False", "rev_comp_mapping_barcodes": "False", "min_per_read_length_fraction": "0.75", "barcode_type": "golay_12"}', 3, NULL, NULL, NULL, NULL, false), + ('8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0', 'test@foo.bar', 12, '{"biom_table": "8", "depth": "9000", "subsample_multinomial": "False"}', 3, NULL, NULL, NULL, NULL, false), + ('063e553b-327c-4818-ab4a-adfe58e49860', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":false,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1,"phred_offset":"auto"}', 1, NULL, NULL, NULL, NULL, true), + ('bcc7ebcd-39c1-43e4-af2d-822e3589f14d', 'test@foo.bar', 2, '{"min_seq_len":100,"max_seq_len":1000,"trim_seq_length":false,"min_qual_score":25,"max_ambig":6,"max_homopolymer":6,"max_primer_mismatch":0,"barcode_type":"golay_12","max_barcode_errors":1.5,"disable_bc_correction":false,"qual_score_window":0,"disable_primers":false,"reverse_primers":"disable","reverse_primer_mismatches":0,"truncate_ambi_bases":false,"input_data":1}', 2, NULL, '2015-11-22 21:00:00', 'demultiplexing', NULL, true), + ('d19f76ee-274e-4c1b-b3a2-a12d73507c55', 'shared@foo.bar', 3, '{"reference":1,"sortmerna_e_value":1,"sortmerna_max_pos":10000,"similarity":0.97,"sortmerna_coverage":0.97,"threads":1,"input_data":2}', 4, 1, '2015-11-22 21:30:00', 'generating demux file', NULL, true), + ('ac653cb5-76a6-4a45-929e-eb9b2dee6b63', 'test@foo.bar', 1, '{"max_bad_run_length":3,"min_per_read_length_fraction":0.75,"sequence_max_n":0,"rev_comp_barcode":false,"rev_comp_mapping_barcodes":false,"rev_comp":false,"phred_quality_threshold":3,"barcode_type":"golay_12","max_barcode_errors":1.5,"input_data":1}', 5, NULL, NULL, NULL, NULL, true); + +INSERT INTO qiita.artifact_processing_job (artifact_id, processing_job_id) VALUES + (1, '6d368e16-2242-4cf8-87b4-a5dc40bb890b'), + (1, '4c7115e8-4c8e-424c-bf25-96c292ca1931'), + (2, '3c9991ab-6c14-4368-a48c-841e8837a79c'), + (1, '063e553b-327c-4818-ab4a-adfe58e49860'), + (1, 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d'), + (1, 'b72369f9-a886-4193-8d3d-f7b504168e75'), + (2, 'd19f76ee-274e-4c1b-b3a2-a12d73507c55'), + (1, '46b76f74-e100-47aa-9bf2-c0208bcea52d'), + (2, '80bf25f3-5f1d-4e10-9369-315e4244f6d5'), + (2, '9ba5ae7a-41e1-4202-b396-0259aeaac366'), + (2, 'e5609746-a985-41a1-babf-6b3ebe9eb5a9'), + (1, '6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f'), + (8, '8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0'); + +INSERT INTO qiita.artifact_output_processing_job (artifact_id, processing_job_id, command_output_id) VALUES + (3, '46b76f74-e100-47aa-9bf2-c0208bcea52d', 1), + (6, '80bf25f3-5f1d-4e10-9369-315e4244f6d5', 3), + (5, '9ba5ae7a-41e1-4202-b396-0259aeaac366', 3), + (4, 'e5609746-a985-41a1-babf-6b3ebe9eb5a9', 3), + (2, '6ad4d590-4fa3-44d3-9a8f-ddbb472b1b5f', 1), + (9, '8a7a8461-e8a1-4b4e-a428-1bc2f4d3ebd0', 7); -- Add client ids and secrets diff --git a/qiita_db/support_files/qiita-db.dbs b/qiita_db/support_files/qiita-db.dbs index 267592b8b..3c1fb2112 100644 --- a/qiita_db/support_files/qiita-db.dbs +++ b/qiita_db/support_files/qiita-db.dbs @@ -1017,8 +1017,8 @@ Information on how raw data y was prepared (prep template)Linked by y being raw_data_id from raw data table. - - + + @@ -1230,9 +1230,8 @@
data for samples in study x (sample template)x is the study_id from study tableMAKE SURE sample_id IS FK TO sample_id IN required_sample_info TABLE - - - + + @@ -1307,9 +1306,9 @@ false - - - + + + @@ -1669,7 +1668,6 @@ Controlled Vocabulary]]> - @@ -1692,8 +1690,6 @@ Controlled Vocabulary]]> - - @@ -1737,6 +1733,9 @@ Controlled Vocabulary]]> + + + analysis tables diff --git a/qiita_db/support_files/qiita-db.html b/qiita_db/support_files/qiita-db.html index c82901095..a99347ebb 100644 --- a/qiita_db/support_files/qiita-db.html +++ b/qiita_db/support_files/qiita-db.html @@ -307,9 +307,9 @@ - + Group_template - + @@ -571,16 +571,11 @@ analysis_sample references artifact ( artifact_id ) artifact_id - Foreign Key fk_prep_template - prep_template_sample references prep_template ( prep_template_id ) - -prep_template_id + analysis_sample references artifact ( artifact_id )' style='fill:#a1a0a0;'>artifact_id Foreign Key fk_required_sample_info_study study_sample references study ( study_id ) -study_id Foreign Key fk_study_prep_template_study study_prep_template references study ( study_id ) @@ -861,7 +856,12 @@ study references timeseries_type ( timeseries_type_id ) timeseries_type_id + study references timeseries_type ( timeseries_type_id )' style='fill:#a1a0a0;'>timeseries_type_id + Foreign Key fk_prep_template + prep_template_sample references prep_template ( prep_template_id ) + +prep_template_id column_controlled_vocabulariesTable qiita.column_controlled_vocabularies @@ -1042,17 +1042,6 @@ <use id='nn' x='1502' y='1362' xlink:href='#nn'/><a xlink:href='#message.message_time'><text x='1518' y='1372'>message_time</text><title>message_time timestamp not null default current_timestamp expirationexpiration timestamp - - - -sample_xTable qiita.sample_x -data for samples in study x (sample template)x is the study_id from study tableMAKE SURE sample_id IS FK TO sample_id IN required_sample_info TABLE - Primary Key ( sample_id ) -sample_idsample_id varchar not null - descriptiondescription varchar not null - other_mapping_columnsother_mapping_columns varchar -Represents whatever other columns go with this study - @@ -1353,28 +1342,6 @@ data_typedata_type varchar not null Data type (16S, metabolome, etc) the job will use - - - -prep_yTable qiita.prep_y -Information on how raw data y was prepared (prep template)Linked by y being raw_data_id from raw data table. - Primary Key ( sample_id ) -sample_idsample_id varchar not null - datadata bigint -STUFFFFF - - - - -prep_template_sampleTable qiita.prep_template_sample - Primary Key ( prep_template_id, sample_id ) Index ( prep_template_id ) -prep_template_idprep_template_id bigint not null -The prep template identifier -References prep_template ( prep_template_id ) - Index ( sample_id ) Index ( sample_id ) Primary Key ( prep_template_id, sample_id ) -sample_idsample_id varchar not null - ebi_experiment_accessionebi_experiment_accession varchar - @@ -1911,8 +1878,8 @@ activeactive bool not null default 'True' is_analysisis_analysis bool not null default 'False' ignore_parent_commandignore_parent_command bool not null default false - post_processing_cmdpost_processing_cmd varchar -Store information on additional post-processing steps for merged BIOMs, if any. + post_processing_cmdpost_processing_cmd varchar +Store information on additional post-processing steps for merged BIOMs, if any. @@ -2042,6 +2009,38 @@ The name of the column that describes the specimen identifiers (such as what is written on the tubes). public_raw_downloadpublic_raw_download bool default 'false' + + + +prep_yTable qiita.prep_y +Information on how raw data y was prepared (prep template)Linked by y being raw_data_id from raw data table. + Primary Key ( sample_id ) +sample_idsample_id varchar not null + sample_valuessample_values varchar +Note that this is a jsonb but there DBSchema doesn't have this data type + + + + +prep_template_sampleTable qiita.prep_template_sample + Primary Key ( prep_template_id, sample_id ) Index ( prep_template_id ) +prep_template_idprep_template_id bigint not null +The prep template identifier +References prep_template ( prep_template_id ) + Index ( sample_id ) Index ( sample_id ) Primary Key ( prep_template_id, sample_id ) +sample_idsample_id varchar not null + ebi_experiment_accessionebi_experiment_accession varchar + + + + +sample_xTable qiita.sample_x +data for samples in study x (sample template)x is the study_id from study tableMAKE SURE sample_id IS FK TO sample_id IN required_sample_info TABLE + Primary Key ( sample_id ) +sample_idsample_id varchar not null + sample_valuessample_values varchar +Note that this is a jsonb but there DBSchema doesn't have this data type +

@@ -2634,39 +2633,6 @@
-

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Table sample_x
data for samples in study x (sample template)x is the study_id from study tableMAKE SURE sample_id IS FK TO sample_id IN required_sample_info TABLE
*sample_id varchar
*description varchar
 other_mapping_columns varchar Represents whatever other columns go with this study
Indexes
Pkpk_study_x_y ON sample_id
-

@@ -3707,83 +3673,6 @@
-

- - - - - - - - - - - - - - - - - - - - - - - - -
Table prep_y
Information on how raw data y was prepared (prep template)Linked by y being raw_data_id from raw data table.
*sample_id varchar
 data bigint STUFFFFF
Indexes
Pkpk_prep_y ON sample_id
- -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Table prep_template_sample
*prep_template_id bigint The prep template identifier
*sample_id varchar
 ebi_experiment_accession varchar
Indexes
 idx_common_prep_info_0 ON sample_id
 idx_common_prep_info_0 ON sample_id
Pkidx_common_prep_info ON prep_template_id, sample_id
 idx_common_prep_info_1 ON prep_template_id
Foreign Keys
 fk_prep_template ( prep_template_id ) ref prep_template (prep_template_id)
-

@@ -5726,10 +5615,10 @@ - + - + @@ -6192,4 +6081,108 @@
*  post_processing_cmd varchar Store information on additional post-processing steps for merged BIOMs, if any. Store information on additional post-processing steps for merged BIOMs, if any.
Indexes
Pkpk_soft_command
+

+ + + + + + + + + + + + + + + + + + + + + + + + +
Table prep_y
Information on how raw data y was prepared (prep template)Linked by y being raw_data_id from raw data table.
*sample_id varchar
 sample_values varchar Note that this is a jsonb but there DBSchema doesn't have this data type
Indexes
Pkpk_prep_y ON sample_id
+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Table prep_template_sample
*prep_template_id bigint The prep template identifier
*sample_id varchar
 ebi_experiment_accession varchar
Indexes
 idx_common_prep_info_0 ON sample_id
 idx_common_prep_info_0 ON sample_id
Pkidx_common_prep_info ON prep_template_id, sample_id
 idx_common_prep_info_1 ON prep_template_id
Foreign Keys
 fk_prep_template ( prep_template_id ) ref prep_template (prep_template_id)
+ +

+ + + + + + + + + + + + + + + + + + + + + + + + +
Table sample_x
data for samples in study x (sample template)x is the study_id from study tableMAKE SURE sample_id IS FK TO sample_id IN required_sample_info TABLE
*sample_id varchar
 sample_values varchar Note that this is a jsonb but there DBSchema doesn't have this data type
Indexes
Pkpk_study_x_y ON sample_id
+ \ No newline at end of file diff --git a/qiita_db/test/test_analysis.py b/qiita_db/test/test_analysis.py index 86c411cca..2f1629b16 100644 --- a/qiita_db/test/test_analysis.py +++ b/qiita_db/test/test_analysis.py @@ -386,6 +386,15 @@ def test_build_mapping_file(self): obs, index='#SampleID') exp = qdb.metadata_template.util.load_template_to_dataframe( self.map_exp_fp, index='#SampleID') + + # assert_frame_equal assumes same order on the rows, thus sorting + # frames by index + obs.sort_index(inplace=True) + exp.sort_index(inplace=True) + # then sorting columns + obs = obs.reindex_axis(sorted(obs.columns), axis=1) + exp = exp.reindex_axis(sorted(exp.columns), axis=1) + assert_frame_equal(obs, exp) def test_build_mapping_file_duplicated_samples_no_merge(self): @@ -406,6 +415,10 @@ def test_build_mapping_file_duplicated_samples_no_merge(self): # frames by index obs.sort_index(inplace=True) exp.sort_index(inplace=True) + # then sorting columns + obs = obs.reindex_axis(sorted(obs.columns), axis=1) + exp = exp.reindex_axis(sorted(exp.columns), axis=1) + assert_frame_equal(obs, exp) def test_build_mapping_file_duplicated_samples_merge(self): @@ -420,6 +433,15 @@ def test_build_mapping_file_duplicated_samples_merge(self): mapping_fp, index='#SampleID') exp = qdb.metadata_template.util.load_template_to_dataframe( self.map_exp_fp, index='#SampleID') + + # assert_frame_equal assumes same order on the rows, thus sorting + # frames by index + obs.sort_index(inplace=True) + exp.sort_index(inplace=True) + # then sorting columns + obs = obs.reindex_axis(sorted(obs.columns), axis=1) + exp = exp.reindex_axis(sorted(exp.columns), axis=1) + assert_frame_equal(obs, exp) def test_build_biom_tables(self): diff --git a/qiita_db/test/test_setup.py b/qiita_db/test/test_setup.py index 30f87061a..c24be339e 100644 --- a/qiita_db/test/test_setup.py +++ b/qiita_db/test/test_setup.py @@ -33,7 +33,7 @@ def test_investigation_study(self): self.assertEqual(get_count("qiita.investigation_study"), 1) def test_filepath(self): - self.assertEqual(get_count("qiita.filepath"), 25) + self.assertEqual(get_count("qiita.filepath"), 27) def test_filepath_type(self): self.assertEqual(get_count("qiita.filepath_type"), 23) @@ -45,7 +45,7 @@ def test_required_sample_info(self): self.assertEqual(get_count("qiita.study_sample"), 27) def test_sample_1(self): - self.assertEqual(get_count("qiita.sample_1"), 27) + self.assertEqual(get_count("qiita.sample_1"), 28) def test_prep_template(self): self.assertEqual(get_count("qiita.prep_template"), 2) @@ -54,7 +54,7 @@ def test_prep_template_sample(self): self.assertEqual(get_count("qiita.prep_template_sample"), 54) def test_prep_1(self): - self.assertEqual(get_count("qiita.prep_1"), 27) + self.assertEqual(get_count("qiita.prep_1"), 28) def test_reference(self): self.assertEqual(get_count("qiita.reference"), 2) @@ -63,7 +63,7 @@ def test_analysis(self): self.assertEqual(get_count("qiita.analysis"), 10) def test_analysis_filepath(self): - self.assertEqual(get_count("qiita.analysis_filepath"), 1) + self.assertEqual(get_count("qiita.analysis_filepath"), 2) def test_analysis_sample(self): self.assertEqual(get_count("qiita.analysis_sample"), 31) diff --git a/qiita_db/test/test_util.py b/qiita_db/test/test_util.py index 62f7173a4..e2e5bdedf 100644 --- a/qiita_db/test/test_util.py +++ b/qiita_db/test/test_util.py @@ -1053,7 +1053,7 @@ def test_generate_study_list_without_artifacts(self): qdb.study.Study.delete(new_study.id) def test_get_artifacts_information(self): - # we are gonna test that it ignores 1 and 2 cause they are not biom, + # we are going to test that it ignores 1 and 2 cause they are not biom, # 4 has all information and 7 and 8 don't obs = qdb.util.get_artifacts_information([1, 2, 4, 7, 8]) # not testing timestamp diff --git a/qiita_db/util.py b/qiita_db/util.py index 474f83804..f0f666003 100644 --- a/qiita_db/util.py +++ b/qiita_db/util.py @@ -1615,12 +1615,7 @@ def get_artifacts_information(artifact_ids, only_biom=True): parent_info.command_id, parent_info.name ORDER BY a.command_id, artifact_id), has_target_subfragment AS ( - SELECT main_query.*, CASE WHEN ( - SELECT true FROM information_schema.columns - WHERE table_name = 'prep_' || CAST( - prep_template_id AS TEXT) - AND column_name='target_subfragment') - THEN prep_template_id ELSE NULL END, prep_template_id + SELECT main_query.*, prep_template_id FROM main_query LEFT JOIN qiita.prep_template pt ON ( main_query.root_id = pt.artifact_id) @@ -1634,7 +1629,10 @@ def get_artifacts_information(artifact_ids, only_biom=True): WHERE parameter_type = 'artifact' GROUP BY command_id""" - sql_ts = """SELECT DISTINCT target_subfragment FROM qiita.prep_%s""" + QCN = qdb.metadata_template.base_metadata_template.QIITA_COLUMN_NAME + sql_ts = """SELECT DISTINCT sample_values->>'target_subfragment' + FROM qiita.prep_%s + WHERE sample_id != '{0}'""".format(QCN) with qdb.sql_connection.TRN: results = [] @@ -1650,15 +1648,19 @@ def get_artifacts_information(artifact_ids, only_biom=True): 'merging_scheme': cmd.merging_scheme, 'deprecated': cmd.software.deprecated} - # now let's get the actual artifacts - ts = {} + # Now let's get the actual artifacts. Note that ts is a cache + # (prep id : target subfragment) so we don't have to query + # multiple times the target subfragment for a prep info file. + # However, some artifacts (like analysis) do not have a prep info + # file; thus we can have a None prep id (key) + ts = {None: []} ps = {} algorithm_az = {'': ''} PT = qdb.metadata_template.prep_template.PrepTemplate qdb.sql_connection.TRN.add(sql, [tuple(artifact_ids)]) for row in qdb.sql_connection.TRN.execute_fetchindex(): aid, name, cid, cname, gt, aparams, dt, pid, pcid, pname, \ - pparams, filepaths, _, target, prep_template_id = row + pparams, filepaths, _, prep_template_id = row # cleaning up aparams # - [0] due to the array_agg @@ -1666,7 +1668,7 @@ def get_artifacts_information(artifact_ids, only_biom=True): if aparams is None: aparams = {} else: - # we are gonna remove any artifacts from the parameters + # we are going to remove any artifacts from the parameters for ti in commands[cid]['params']: del aparams[ti] @@ -1676,10 +1678,6 @@ def get_artifacts_information(artifact_ids, only_biom=True): else: filepaths = [fp for fp in filepaths if fp.endswith('biom')] - # - ignoring empty target - if target == [None]: - target = [] - # generating algorithm, by default is '' algorithm = '' # set to False because if there is no cid, it means that it @@ -1715,14 +1713,11 @@ def get_artifacts_information(artifact_ids, only_biom=True): algorithm_az[algorithm] = hashlib.md5( algorithm).hexdigest() - if target is None: - target = [] - else: - if target not in ts: - qdb.sql_connection.TRN.add(sql_ts, [target]) - ts[target] = \ - qdb.sql_connection.TRN.execute_fetchflatten() - target = ts[target] + if prep_template_id not in ts: + qdb.sql_connection.TRN.add(sql_ts, [prep_template_id]) + ts[prep_template_id] = \ + qdb.sql_connection.TRN.execute_fetchflatten() + target = ts[prep_template_id] prep_samples = 0 platform = 'not provided' diff --git a/qiita_pet/handlers/api_proxy/tests/test_prep_template.py b/qiita_pet/handlers/api_proxy/tests/test_prep_template.py index 1b5c3cf9b..68fd2b240 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_prep_template.py +++ b/qiita_pet/handlers/api_proxy/tests/test_prep_template.py @@ -72,8 +72,8 @@ def test_prep_template_ajax_get_req(self): 'message': '', 'name': "Prep information 1", 'files': ["uploaded_file.txt"], - 'download_prep_id': 23, - 'download_qiime_id': 24, + 'download_prep_id': 24, + 'download_qiime_id': 25, 'other_filepaths': ['1_prep_1_19700101-000000.txt', '1_prep_1_19700101-000000.txt'], 'num_samples': 27, @@ -183,7 +183,7 @@ def test_prep_template_filepaths_get_req(self): self.assertEqual(obs['message'], '') # [0] the fp_id is the first element, that should change fp_ids = [fp[0] for fp in obs['filepaths']] - self.assertItemsEqual(fp_ids, [18, 19, 20, 21, 23, 24]) + self.assertItemsEqual(fp_ids, [18, 19, 20, 21, 24, 25]) def test_prep_template_filepaths_get_req_no_access(self): obs = prep_template_filepaths_get_req(1, 'demo@microbio.me') diff --git a/qiita_pet/handlers/api_proxy/tests/test_sample_template.py b/qiita_pet/handlers/api_proxy/tests/test_sample_template.py index 69c1ecac0..5473ecae2 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_sample_template.py +++ b/qiita_pet/handlers/api_proxy/tests/test_sample_template.py @@ -264,7 +264,7 @@ def test_sample_template_filepaths_get_req(self): self.assertEqual(obs['message'], '') # [0] the fp_id is the first element, that should change fp_ids = [fp[0] for fp in obs['filepaths']] - self.assertItemsEqual(fp_ids, [17, 22]) + self.assertItemsEqual(fp_ids, [17, 23]) def test_sample_template_filepaths_get_req_no_access(self): obs = sample_template_filepaths_get_req(1, 'demo@microbio.me') diff --git a/qiita_pet/handlers/artifact_handlers/base_handlers.py b/qiita_pet/handlers/artifact_handlers/base_handlers.py index c4de7f317..3995037b2 100644 --- a/qiita_pet/handlers/artifact_handlers/base_handlers.py +++ b/qiita_pet/handlers/artifact_handlers/base_handlers.py @@ -413,7 +413,7 @@ def validate_absolute_path(self, root, absolute_path): """Overrides StaticFileHandler's method to include authentication""" user = self.current_user - # we are gonna inverse traverse the absolute_path and find the first + # we are going to inverse traverse the absolute_path and find the first # instance of an int, which is the artifact_id for s in reversed(absolute_path.split('/')): try: diff --git a/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py b/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py index ade9d7698..3abf10e60 100644 --- a/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py +++ b/qiita_pet/handlers/artifact_handlers/tests/test_base_handlers.py @@ -256,7 +256,7 @@ def test_artifact_summary_get_request(self): 'editable': True, 'buttons': '', 'processing_info': {}, - 'files': [(27, 'biom_table.biom (biom)')], + 'files': [(22, 'biom_table.biom (biom)')], 'is_from_analysis': True, 'summary': None, 'job': None, diff --git a/qiita_pet/handlers/study_handlers/tests/test_sample_template.py b/qiita_pet/handlers/study_handlers/tests/test_sample_template.py index e01ecc32f..018111e50 100644 --- a/qiita_pet/handlers/study_handlers/tests/test_sample_template.py +++ b/qiita_pet/handlers/study_handlers/tests/test_sample_template.py @@ -272,11 +272,12 @@ def test_sample_template_overview_handler_get_request(self): 'data_types': [], 'user_can_edit': True, 'job': None, - 'download_id': 22, + 'download_id': 23, 'old_files': ['1_19700101-000000.txt'], 'num_samples': 27, 'num_columns': 30, - 'columns': ['season_environment', 'assigned_from_geo', + 'columns': sorted( + ['season_environment', 'assigned_from_geo', 'texture', 'taxon_id', 'depth', 'host_taxid', 'common_name', 'water_content_soil', 'elevation', 'temp', 'tot_nitro', 'samp_salinity', 'altitude', @@ -286,7 +287,7 @@ def test_sample_template_overview_handler_get_request(self): 'physical_specimen_remaining', 'dna_extracted', 'sample_type', 'collection_timestamp', 'host_subject_id', 'description', 'latitude', - 'longitude', 'scientific_name'], + 'longitude', 'scientific_name']), 'specimen_id_column': None} self.assertEqual(obs, exp) @@ -333,7 +334,7 @@ def test_sample_template_columns_get_req(self): # Test success obs = sample_template_columns_get_req(1, None, user) - exp = [ + exp = sorted([ 'season_environment', 'assigned_from_geo', 'texture', 'taxon_id', 'depth', 'host_taxid', 'common_name', 'water_content_soil', 'elevation', 'temp', 'tot_nitro', 'samp_salinity', 'altitude', @@ -342,7 +343,7 @@ def test_sample_template_columns_get_req(self): 'physical_specimen_location', 'physical_specimen_remaining', 'dna_extracted', 'sample_type', 'collection_timestamp', 'host_subject_id', 'description', 'latitude', 'longitude', - 'scientific_name'] + 'scientific_name']) self.assertEqual(obs, exp) obs = sample_template_columns_get_req(1, 'season_environment', user) @@ -483,11 +484,12 @@ def test_get(self): 'data_types': [], 'user_can_edit': True, 'job': None, - 'download_id': 22, + 'download_id': 23, 'old_files': ['1_19700101-000000.txt'], 'num_samples': 27, 'num_columns': 30, - 'columns': ['season_environment', 'assigned_from_geo', + 'columns': sorted( + ['season_environment', 'assigned_from_geo', 'texture', 'taxon_id', 'depth', 'host_taxid', 'common_name', 'water_content_soil', 'elevation', 'temp', 'tot_nitro', 'samp_salinity', 'altitude', @@ -497,9 +499,9 @@ def test_get(self): 'physical_specimen_remaining', 'dna_extracted', 'sample_type', 'collection_timestamp', 'host_subject_id', 'description', 'latitude', - 'longitude', 'scientific_name'], + 'longitude', 'scientific_name']), 'specimen_id_column': None} - self.assertEqual(obs, exp) + self.assertDictEqual(obs, exp) class TestSampleTemplateColumnsHandler(TestHandlerBase): @@ -509,7 +511,7 @@ def test_get(self): self.assertEqual(response.code, 200) self.assertIsNotNone(response.body) obs = loads(response.body) - exp = {'values': [ + exp = {'values': sorted([ 'season_environment', 'assigned_from_geo', 'texture', 'taxon_id', 'depth', 'host_taxid', 'common_name', 'water_content_soil', 'elevation', 'temp', 'tot_nitro', 'samp_salinity', 'altitude', @@ -518,7 +520,7 @@ def test_get(self): 'physical_specimen_location', 'physical_specimen_remaining', 'dna_extracted', 'sample_type', 'collection_timestamp', 'host_subject_id', 'description', 'latitude', 'longitude', - 'scientific_name']} + 'scientific_name'])} self.assertEqual(obs, exp) diff --git a/scripts/qiita-recover-jobs b/scripts/qiita-recover-jobs index 365709dcf..0adca120c 100644 --- a/scripts/qiita-recover-jobs +++ b/scripts/qiita-recover-jobs @@ -26,7 +26,7 @@ SQL = """SELECT processing_job_id def _submit_jobs(jids_to_recover, recover_type): - # we are gonna split the SLEEP_TIME by CHANCES so we can ctrl-c + # we are going to split the SLEEP_TIME by CHANCES so we can ctrl-c # ... just in case st = int(ceil(SLEEP_TIME/CHANCES)) len_jids_to_recover = len(jids_to_recover)