Skip to content

[DO NOT MERGE] August 24th Release #2249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Aug 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
79b232f
fix #2229
antgonza Aug 16, 2017
2d48a65
cleaning html
antgonza Aug 16, 2017
ee0deb4
fix #2225 and partial #2224
antgonza Aug 16, 2017
4dc760b
fix errors
antgonza Aug 16, 2017
868a9e0
fix #2224 and fix #2223
antgonza Aug 17, 2017
e5ff2c8
fixing comments/errors
antgonza Aug 17, 2017
24b7d8e
Remove race condition - Fixes 2143 (#2203)
josenavas Aug 17, 2017
51ff4f9
fixing error
antgonza Aug 17, 2017
adf98a3
fix #2216
antgonza Aug 17, 2017
4af4656
Merge pull request #2243 from antgonza/fix-2216
adswafford Aug 17, 2017
a450aeb
Merge pull request #2242 from antgonza/fix-2225
adswafford Aug 18, 2017
db0a4b6
Merge pull request #2241 from antgonza/fix-2229
adswafford Aug 18, 2017
899268a
delete-root-analysis-artifact
antgonza Aug 18, 2017
3e17876
fix errors
antgonza Aug 18, 2017
a369cad
Merge pull request #2244 from antgonza/delete-root-analysis-artifact
josenavas Aug 18, 2017
773c8da
fix #2210
antgonza Aug 20, 2017
4bf4e97
fix error
antgonza Aug 20, 2017
a5836b1
fixing test
antgonza Aug 21, 2017
c925756
fix errors
antgonza Aug 21, 2017
19b4fe3
fixes 1880
josenavas Aug 21, 2017
d1bbe46
Removing commented code
josenavas Aug 21, 2017
63ccfd9
Fixing test
josenavas Aug 21, 2017
acd27f5
fix #2211 (#2240)
antgonza Aug 21, 2017
eae20cc
DEBUG: adding prints to see the actual error
josenavas Aug 21, 2017
b7e2cc9
Fixing test and remove debug code
josenavas Aug 21, 2017
39b2b84
Merge pull request #2247 from josenavas/fix-1880
tanaes Aug 21, 2017
f94af55
Merge pull request #2246 from antgonza/fix-2210
tanaes Aug 21, 2017
7941067
Merge pull request #2248 from biocore/dev
josenavas Aug 21, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions qiita_db/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,11 +573,12 @@ def delete(cls, artifact_id):
WHERE artifact_id = %s"""
qdb.sql_connection.TRN.add(sql, [artifact_id])

# If the artifact doesn't have parents, we move the files to the
# uploads folder. We also need to nullify the column in the prep
# template table
if not instance.parents:
qdb.util.move_filepaths_to_upload_folder(study.id, filepaths)
# If the artifact doesn't have parents and study is not None (is an
# analysis), we move the files to the uploads folder. We also need
# to nullify the column in the prep template table
if not instance.parents and study is not None:
qdb.util.move_filepaths_to_upload_folder(
study.id, filepaths)

sql = """UPDATE qiita.prep_template
SET artifact_id = NULL
Expand Down
75 changes: 59 additions & 16 deletions qiita_db/metadata_template/base_metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,7 +1124,7 @@ def extend(self, md_template):
self.validate(self.columns_restrictions)
self.generate_files()

def update(self, md_template):
def _update(self, md_template):
r"""Update values in the template

Parameters
Expand All @@ -1143,22 +1143,19 @@ def update(self, md_template):
passed md_template
"""
with qdb.sql_connection.TRN:
# Clean and validate the metadata template given
new_map = self._clean_validate_template(
md_template, self.study_id, current_columns=self.categories())
# Retrieving current metadata
current_map = self.to_dataframe()

# simple validations of sample ids and column names
samples_diff = set(new_map.index).difference(current_map.index)
samples_diff = set(md_template.index).difference(current_map.index)
if samples_diff:
raise qdb.exceptions.QiitaDBError(
'The new template differs from what is stored '
'in database by these samples names: %s'
% ', '.join(samples_diff))

if not set(current_map.columns).issuperset(new_map.columns):
columns_diff = set(new_map.columns).difference(
if not set(current_map.columns).issuperset(md_template.columns):
columns_diff = set(md_template.columns).difference(
current_map.columns)
raise qdb.exceptions.QiitaDBError(
'Some of the columns in your template are not present in '
Expand All @@ -1168,15 +1165,16 @@ def update(self, md_template):

# In order to speed up some computation, let's compare only the
# common columns and rows. current_map.columns and
# current_map.index are supersets of new_map.columns and
# new_map.index, respectivelly, so this will not fail
current_map = current_map[new_map.columns].loc[new_map.index]
# current_map.index are supersets of md_template.columns and
# md_template.index, respectivelly, so this will not fail
current_map = current_map[
md_template.columns].loc[md_template.index]

# Get the values that we need to change
# diff_map is a DataFrame that hold boolean values. If a cell is
# True, means that the new_map is different from the current_map
# while False means that the cell has the same value
diff_map = current_map != new_map
# True, means that the md_template is different from the
# current_map while False means that the cell has the same value
diff_map = current_map != md_template
# ne_stacked holds a MultiIndexed DataFrame in which the first
# level of indexing is the sample_name and the second one is the
# columns. We only have 1 column, which holds if that
Expand All @@ -1195,8 +1193,8 @@ def update(self, md_template):
changed.index.names = ['sample_name', 'column']
# the combination of np.where and boolean indexing produces
# a numpy array with only the values that actually changed
# between the current_map and new_map
changed_to = new_map.values[np.where(diff_map)]
# between the current_map and md_template
changed_to = md_template.values[np.where(diff_map)]

# to_update is a MultiIndexed DataFrame, in which the index 0 is
# the samples and the index 1 is the columns, we define these
Expand Down Expand Up @@ -1235,12 +1233,57 @@ def update(self, md_template):
""".format(self._table_name(self._id), sql_eq_cols,
single_value, sql_cols)
for sample in samples_to_update:
sample_vals = [new_map[col][sample] for col in cols_to_update]
sample_vals = [md_template[col][sample]
for col in cols_to_update]
sample_vals.insert(0, sample)
qdb.sql_connection.TRN.add(sql, sample_vals)

qdb.sql_connection.TRN.execute()

def update(self, md_template):
r"""Update values in the template

Parameters
----------
md_template : DataFrame
The metadata template file contents indexed by samples ids

Raises
------
QiitaDBError
If md_template and db do not have the same sample ids
If md_template and db do not have the same column headers
If self.can_be_updated is not True
QiitaDBWarning
If there are no differences between the contents of the DB and the
passed md_template
"""
with qdb.sql_connection.TRN:
# Clean and validate the metadata template given
new_map = self._clean_validate_template(
md_template, self.study_id, current_columns=self.categories())
self._update(new_map)
self.validate(self.columns_restrictions)
self.generate_files()

def extend_and_update(self, md_template):
"""Performs the update and extend operations at once

Parameters
----------
md_template : DataFrame
The metadata template contents indexed by sample ids

See Also
--------
update
extend
"""
with qdb.sql_connection.TRN:
md_template = self._clean_validate_template(
md_template, self.study_id, current_columns=self.categories())
self._common_extend_steps(md_template)
self._update(md_template)
self.validate(self.columns_restrictions)
self.generate_files()

Expand Down
3 changes: 1 addition & 2 deletions qiita_db/metadata_template/test/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -1280,8 +1280,7 @@ def test_extend_update(self):
self.metadata['str_column']['SKB7.640196'] = 'NEW VAL'

npt.assert_warns(
qdb.exceptions.QiitaDBWarning, pt.extend, self.metadata)
pt.update(self.metadata)
qdb.exceptions.QiitaDBWarning, pt.extend_and_update, self.metadata)

sql = "SELECT * FROM qiita.prep_{0}".format(pt.id)
obs = [dict(o) for o in self.conn_handler.execute_fetchall(sql)]
Expand Down
4 changes: 2 additions & 2 deletions qiita_db/metadata_template/test/test_sample_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -1732,8 +1732,8 @@ def test_extend_update(self):
md_ext['TOT_NITRO'] = pd.Series(['val1', 'val2', 'val3', 'val4'],
index=md_ext.index)

npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext)
st.update(md_ext)
npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend_and_update,
md_ext)
exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id,
"%s.Sample3" % st.id, "%s.Sample4" % st.id}
self.assertEqual(st._get_sample_ids(), exp_sample_ids)
Expand Down
14 changes: 7 additions & 7 deletions qiita_db/metadata_template/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def test_load_template_to_dataframe_lowercase(self):

def test_load_template_to_dataframe_non_utf8(self):
bad = EXP_SAMPLE_TEMPLATE.replace('Test Sample 2', 'Test Sample\x962')
with self.assertRaises(qdb.exceptions.QiitaDBError):
with self.assertRaises(ValueError):
qdb.metadata_template.util.load_template_to_dataframe(
StringIO(bad))

Expand Down Expand Up @@ -387,20 +387,20 @@ def test_get_pgsql_reserved_words(self):

EXP_SAMPLE_TEMPLATE_SPACES_EMPTY_ROW = (
"sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t"
"has_physical_specimen\thost_subject_id\tint_column\tlatitude\tlongitude\t"
"physical_location\trequired_sample_info_status\tsample_type\t"
"str_column\n"
"2.Sample1 \t2014-05-29 12:24:51\tTest Sample 1\tTrue\tTrue\t"
"has_physical_specimen\thost_subject_id\tint_column\tlatitude\t"
"longitude\t physical_location\trequired_sample_info_status"
"\tsample_type\tstr_column\n"
" 2.Sample1 \t2014-05-29 12:24:51\tTest Sample 1\tTrue\tTrue\t"
"NotIdentified\t1\t42.42\t41.41\tlocation1\treceived\ttype1\t"
"Value for sample 1\n"
"2.Sample2 \t2014-05-29 12:24:51\t"
" 2.Sample2 \t2014-05-29 12:24:51\t"
"Test Sample 2\tTrue\tTrue\tNotIdentified\t2\t4.2\t1.1\tlocation1\t"
"received\ttype1\tValue for sample 2\n"
"2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\t"
"True\tNotIdentified\t3\t4.8\t4.41\tlocation1\treceived\ttype1\t"
"Value for sample 3\n"
"\t\t\t\t\t\t\t\t\t\t\t\t\n"
"\t\t\t\t\t\t\t\t\t\t\t\t\n")
"\t\t\t\t\t\t\t\t\t\t \t\t\n")

EXP_ST_SPACES_EMPTY_COLUMN = (
"sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t"
Expand Down
65 changes: 35 additions & 30 deletions qiita_db/metadata_template/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
# -----------------------------------------------------------------------------

from __future__ import division
from collections import defaultdict
from future.utils import PY3, viewitems
from six import StringIO
from string import printable

import pandas as pd
import numpy as np
Expand Down Expand Up @@ -103,7 +103,27 @@ def load_template_to_dataframe(fn, index='sample_name'):
# Load in file lines
holdfile = None
with open_file(fn, mode='U') as f:
errors = {}
holdfile = f.readlines()
# here we are checking for non printable chars AKA non UTF-8 chars
for row, line in enumerate(holdfile):
for col, block in enumerate(line.split('\t')):
tblock = ''.join([c for c in block if c in printable])
if len(block) != len(tblock):
tblock = ''.join([c if c in printable else '🐾'
for c in block])
if tblock not in errors:
errors[tblock] = []
errors[tblock].append('(%d, %d)' % (row, col))
if bool(errors):
raise ValueError(
"There are invalid (non UTF-8) characters in your information "
"file. The offending fields and their location (row, column) "
"are listed below, invalid characters are represented using "
"🐾: %s" % '; '.join(
['"%s" = %s' % (k, ', '.join(v))
for k, v in viewitems(errors)]))

if not holdfile:
raise ValueError('Empty file passed!')

Expand Down Expand Up @@ -137,7 +157,7 @@ def load_template_to_dataframe(fn, index='sample_name'):
# .strip will remove odd chars, newlines, tabs and multiple
# spaces but we need to read a new line at the end of the
# line(+'\n')
newcols = [d.strip(" \r\x0b\x0c\n") for d in cols]
newcols = [d.strip(" \r\n") for d in cols]

holdfile[pos] = '\t'.join(newcols) + '\n'

Expand All @@ -149,34 +169,19 @@ def load_template_to_dataframe(fn, index='sample_name'):
# comment:
# using the tab character as "comment" we remove rows that are
# constituted only by delimiters i. e. empty rows.
try:
template = pd.read_csv(
StringIO(''.join(holdfile)),
sep='\t',
dtype=str,
encoding='utf-8',
infer_datetime_format=False,
keep_default_na=False,
index_col=False,
comment='\t',
converters={index: lambda x: str(x).strip()})
# remove newlines and tabs from fields
template.replace(to_replace='[\t\n\r\x0b\x0c]+', value='',
regex=True, inplace=True)
except UnicodeDecodeError:
# Find row number and col number for utf-8 encoding errors
headers = holdfile[0].strip().split('\t')
errors = defaultdict(list)
for row, line in enumerate(holdfile, 1):
for col, cell in enumerate(line.split('\t')):
try:
cell.encode('utf-8')
except UnicodeError:
errors[headers[col]].append(row)
lines = ['%s: row(s) %s' % (header, ', '.join(map(str, rows)))
for header, rows in viewitems(errors)]
raise qdb.exceptions.QiitaDBError(
'Non UTF-8 characters found in columns:\n' + '\n'.join(lines))
template = pd.read_csv(
StringIO(''.join(holdfile)),
sep='\t',
dtype=str,
encoding='utf-8',
infer_datetime_format=False,
keep_default_na=False,
index_col=False,
comment='\t',
converters={index: lambda x: str(x).strip()})
# remove newlines and tabs from fields
template.replace(to_replace='[\t\n\r\x0b\x0c]+', value='',
regex=True, inplace=True)

initial_columns = set(template.columns)

Expand Down
21 changes: 20 additions & 1 deletion qiita_db/private.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,27 @@ def build_analysis_files(job):
j.submit()
sleep(1)

# The validator jobs no longer finish the job automatically so we need
# to release the validators here
job.release_validators()

TASK_DICT = {'build_analysis_files': build_analysis_files}

def release_validators(job):
"""Waits until all the validators of a job are completed

Parameters
----------
job : qiita_db.processing_job.ProcessingJob
The processing job with the information of the parent job
"""
with qdb.sql_connection.TRN:
qdb.processing_job.ProcessingJob(
job.parameters.values['job']).release_validators()
job._set_status('success')


TASK_DICT = {'build_analysis_files': build_analysis_files,
'release_validators': release_validators}


def private_task(job_id):
Expand Down
Loading