Skip to content

Commit 6080742

Browse files
authored
Merge pull request #2249 from biocore/release-candidate
[DO NOT MERGE] August 24th Release
2 parents 6b59f37 + 7941067 commit 6080742

File tree

30 files changed

+2203
-2012
lines changed

30 files changed

+2203
-2012
lines changed

qiita_db/artifact.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -573,11 +573,12 @@ def delete(cls, artifact_id):
573573
WHERE artifact_id = %s"""
574574
qdb.sql_connection.TRN.add(sql, [artifact_id])
575575

576-
# If the artifact doesn't have parents, we move the files to the
577-
# uploads folder. We also need to nullify the column in the prep
578-
# template table
579-
if not instance.parents:
580-
qdb.util.move_filepaths_to_upload_folder(study.id, filepaths)
576+
# If the artifact doesn't have parents and study is not None (is an
577+
# analysis), we move the files to the uploads folder. We also need
578+
# to nullify the column in the prep template table
579+
if not instance.parents and study is not None:
580+
qdb.util.move_filepaths_to_upload_folder(
581+
study.id, filepaths)
581582

582583
sql = """UPDATE qiita.prep_template
583584
SET artifact_id = NULL

qiita_db/metadata_template/base_metadata_template.py

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,7 +1124,7 @@ def extend(self, md_template):
11241124
self.validate(self.columns_restrictions)
11251125
self.generate_files()
11261126

1127-
def update(self, md_template):
1127+
def _update(self, md_template):
11281128
r"""Update values in the template
11291129
11301130
Parameters
@@ -1143,22 +1143,19 @@ def update(self, md_template):
11431143
passed md_template
11441144
"""
11451145
with qdb.sql_connection.TRN:
1146-
# Clean and validate the metadata template given
1147-
new_map = self._clean_validate_template(
1148-
md_template, self.study_id, current_columns=self.categories())
11491146
# Retrieving current metadata
11501147
current_map = self.to_dataframe()
11511148

11521149
# simple validations of sample ids and column names
1153-
samples_diff = set(new_map.index).difference(current_map.index)
1150+
samples_diff = set(md_template.index).difference(current_map.index)
11541151
if samples_diff:
11551152
raise qdb.exceptions.QiitaDBError(
11561153
'The new template differs from what is stored '
11571154
'in database by these samples names: %s'
11581155
% ', '.join(samples_diff))
11591156

1160-
if not set(current_map.columns).issuperset(new_map.columns):
1161-
columns_diff = set(new_map.columns).difference(
1157+
if not set(current_map.columns).issuperset(md_template.columns):
1158+
columns_diff = set(md_template.columns).difference(
11621159
current_map.columns)
11631160
raise qdb.exceptions.QiitaDBError(
11641161
'Some of the columns in your template are not present in '
@@ -1168,15 +1165,16 @@ def update(self, md_template):
11681165

11691166
# In order to speed up some computation, let's compare only the
11701167
# common columns and rows. current_map.columns and
1171-
# current_map.index are supersets of new_map.columns and
1172-
# new_map.index, respectivelly, so this will not fail
1173-
current_map = current_map[new_map.columns].loc[new_map.index]
1168+
# current_map.index are supersets of md_template.columns and
1169+
# md_template.index, respectivelly, so this will not fail
1170+
current_map = current_map[
1171+
md_template.columns].loc[md_template.index]
11741172

11751173
# Get the values that we need to change
11761174
# diff_map is a DataFrame that hold boolean values. If a cell is
1177-
# True, means that the new_map is different from the current_map
1178-
# while False means that the cell has the same value
1179-
diff_map = current_map != new_map
1175+
# True, means that the md_template is different from the
1176+
# current_map while False means that the cell has the same value
1177+
diff_map = current_map != md_template
11801178
# ne_stacked holds a MultiIndexed DataFrame in which the first
11811179
# level of indexing is the sample_name and the second one is the
11821180
# columns. We only have 1 column, which holds if that
@@ -1195,8 +1193,8 @@ def update(self, md_template):
11951193
changed.index.names = ['sample_name', 'column']
11961194
# the combination of np.where and boolean indexing produces
11971195
# a numpy array with only the values that actually changed
1198-
# between the current_map and new_map
1199-
changed_to = new_map.values[np.where(diff_map)]
1196+
# between the current_map and md_template
1197+
changed_to = md_template.values[np.where(diff_map)]
12001198

12011199
# to_update is a MultiIndexed DataFrame, in which the index 0 is
12021200
# the samples and the index 1 is the columns, we define these
@@ -1235,12 +1233,57 @@ def update(self, md_template):
12351233
""".format(self._table_name(self._id), sql_eq_cols,
12361234
single_value, sql_cols)
12371235
for sample in samples_to_update:
1238-
sample_vals = [new_map[col][sample] for col in cols_to_update]
1236+
sample_vals = [md_template[col][sample]
1237+
for col in cols_to_update]
12391238
sample_vals.insert(0, sample)
12401239
qdb.sql_connection.TRN.add(sql, sample_vals)
12411240

12421241
qdb.sql_connection.TRN.execute()
12431242

1243+
def update(self, md_template):
1244+
r"""Update values in the template
1245+
1246+
Parameters
1247+
----------
1248+
md_template : DataFrame
1249+
The metadata template file contents indexed by samples ids
1250+
1251+
Raises
1252+
------
1253+
QiitaDBError
1254+
If md_template and db do not have the same sample ids
1255+
If md_template and db do not have the same column headers
1256+
If self.can_be_updated is not True
1257+
QiitaDBWarning
1258+
If there are no differences between the contents of the DB and the
1259+
passed md_template
1260+
"""
1261+
with qdb.sql_connection.TRN:
1262+
# Clean and validate the metadata template given
1263+
new_map = self._clean_validate_template(
1264+
md_template, self.study_id, current_columns=self.categories())
1265+
self._update(new_map)
1266+
self.validate(self.columns_restrictions)
1267+
self.generate_files()
1268+
1269+
def extend_and_update(self, md_template):
1270+
"""Performs the update and extend operations at once
1271+
1272+
Parameters
1273+
----------
1274+
md_template : DataFrame
1275+
The metadata template contents indexed by sample ids
1276+
1277+
See Also
1278+
--------
1279+
update
1280+
extend
1281+
"""
1282+
with qdb.sql_connection.TRN:
1283+
md_template = self._clean_validate_template(
1284+
md_template, self.study_id, current_columns=self.categories())
1285+
self._common_extend_steps(md_template)
1286+
self._update(md_template)
12441287
self.validate(self.columns_restrictions)
12451288
self.generate_files()
12461289

qiita_db/metadata_template/test/test_prep_template.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,8 +1280,7 @@ def test_extend_update(self):
12801280
self.metadata['str_column']['SKB7.640196'] = 'NEW VAL'
12811281

12821282
npt.assert_warns(
1283-
qdb.exceptions.QiitaDBWarning, pt.extend, self.metadata)
1284-
pt.update(self.metadata)
1283+
qdb.exceptions.QiitaDBWarning, pt.extend_and_update, self.metadata)
12851284

12861285
sql = "SELECT * FROM qiita.prep_{0}".format(pt.id)
12871286
obs = [dict(o) for o in self.conn_handler.execute_fetchall(sql)]

qiita_db/metadata_template/test/test_sample_template.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,8 +1732,8 @@ def test_extend_update(self):
17321732
md_ext['TOT_NITRO'] = pd.Series(['val1', 'val2', 'val3', 'val4'],
17331733
index=md_ext.index)
17341734

1735-
npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext)
1736-
st.update(md_ext)
1735+
npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend_and_update,
1736+
md_ext)
17371737
exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id,
17381738
"%s.Sample3" % st.id, "%s.Sample4" % st.id}
17391739
self.assertEqual(st._get_sample_ids(), exp_sample_ids)

qiita_db/metadata_template/test/test_util.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def test_load_template_to_dataframe_lowercase(self):
149149

150150
def test_load_template_to_dataframe_non_utf8(self):
151151
bad = EXP_SAMPLE_TEMPLATE.replace('Test Sample 2', 'Test Sample\x962')
152-
with self.assertRaises(qdb.exceptions.QiitaDBError):
152+
with self.assertRaises(ValueError):
153153
qdb.metadata_template.util.load_template_to_dataframe(
154154
StringIO(bad))
155155

@@ -387,20 +387,20 @@ def test_get_pgsql_reserved_words(self):
387387

388388
EXP_SAMPLE_TEMPLATE_SPACES_EMPTY_ROW = (
389389
"sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t"
390-
"has_physical_specimen\thost_subject_id\tint_column\tlatitude\tlongitude\t"
391-
"physical_location\trequired_sample_info_status\tsample_type\t"
392-
"str_column\n"
393-
"2.Sample1 \t2014-05-29 12:24:51\tTest Sample 1\tTrue\tTrue\t"
390+
"has_physical_specimen\thost_subject_id\tint_column\tlatitude\t"
391+
"longitude\t physical_location\trequired_sample_info_status"
392+
"\tsample_type\tstr_column\n"
393+
" 2.Sample1 \t2014-05-29 12:24:51\tTest Sample 1\tTrue\tTrue\t"
394394
"NotIdentified\t1\t42.42\t41.41\tlocation1\treceived\ttype1\t"
395395
"Value for sample 1\n"
396-
"2.Sample2 \t2014-05-29 12:24:51\t"
396+
" 2.Sample2 \t2014-05-29 12:24:51\t"
397397
"Test Sample 2\tTrue\tTrue\tNotIdentified\t2\t4.2\t1.1\tlocation1\t"
398398
"received\ttype1\tValue for sample 2\n"
399399
"2.Sample3\t2014-05-29 12:24:51\tTest Sample 3\tTrue\t"
400400
"True\tNotIdentified\t3\t4.8\t4.41\tlocation1\treceived\ttype1\t"
401401
"Value for sample 3\n"
402402
"\t\t\t\t\t\t\t\t\t\t\t\t\n"
403-
"\t\t\t\t\t\t\t\t\t\t\t\t\n")
403+
"\t\t\t\t\t\t\t\t\t\t \t\t\n")
404404

405405
EXP_ST_SPACES_EMPTY_COLUMN = (
406406
"sample_name\tcollection_timestamp\tdescription\thas_extracted_data\t"

qiita_db/metadata_template/util.py

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
# -----------------------------------------------------------------------------
88

99
from __future__ import division
10-
from collections import defaultdict
1110
from future.utils import PY3, viewitems
1211
from six import StringIO
12+
from string import printable
1313

1414
import pandas as pd
1515
import numpy as np
@@ -103,7 +103,27 @@ def load_template_to_dataframe(fn, index='sample_name'):
103103
# Load in file lines
104104
holdfile = None
105105
with open_file(fn, mode='U') as f:
106+
errors = {}
106107
holdfile = f.readlines()
108+
# here we are checking for non printable chars AKA non UTF-8 chars
109+
for row, line in enumerate(holdfile):
110+
for col, block in enumerate(line.split('\t')):
111+
tblock = ''.join([c for c in block if c in printable])
112+
if len(block) != len(tblock):
113+
tblock = ''.join([c if c in printable else '🐾'
114+
for c in block])
115+
if tblock not in errors:
116+
errors[tblock] = []
117+
errors[tblock].append('(%d, %d)' % (row, col))
118+
if bool(errors):
119+
raise ValueError(
120+
"There are invalid (non UTF-8) characters in your information "
121+
"file. The offending fields and their location (row, column) "
122+
"are listed below, invalid characters are represented using "
123+
"🐾: %s" % '; '.join(
124+
['"%s" = %s' % (k, ', '.join(v))
125+
for k, v in viewitems(errors)]))
126+
107127
if not holdfile:
108128
raise ValueError('Empty file passed!')
109129

@@ -137,7 +157,7 @@ def load_template_to_dataframe(fn, index='sample_name'):
137157
# .strip will remove odd chars, newlines, tabs and multiple
138158
# spaces but we need to read a new line at the end of the
139159
# line(+'\n')
140-
newcols = [d.strip(" \r\x0b\x0c\n") for d in cols]
160+
newcols = [d.strip(" \r\n") for d in cols]
141161

142162
holdfile[pos] = '\t'.join(newcols) + '\n'
143163

@@ -149,34 +169,19 @@ def load_template_to_dataframe(fn, index='sample_name'):
149169
# comment:
150170
# using the tab character as "comment" we remove rows that are
151171
# constituted only by delimiters i. e. empty rows.
152-
try:
153-
template = pd.read_csv(
154-
StringIO(''.join(holdfile)),
155-
sep='\t',
156-
dtype=str,
157-
encoding='utf-8',
158-
infer_datetime_format=False,
159-
keep_default_na=False,
160-
index_col=False,
161-
comment='\t',
162-
converters={index: lambda x: str(x).strip()})
163-
# remove newlines and tabs from fields
164-
template.replace(to_replace='[\t\n\r\x0b\x0c]+', value='',
165-
regex=True, inplace=True)
166-
except UnicodeDecodeError:
167-
# Find row number and col number for utf-8 encoding errors
168-
headers = holdfile[0].strip().split('\t')
169-
errors = defaultdict(list)
170-
for row, line in enumerate(holdfile, 1):
171-
for col, cell in enumerate(line.split('\t')):
172-
try:
173-
cell.encode('utf-8')
174-
except UnicodeError:
175-
errors[headers[col]].append(row)
176-
lines = ['%s: row(s) %s' % (header, ', '.join(map(str, rows)))
177-
for header, rows in viewitems(errors)]
178-
raise qdb.exceptions.QiitaDBError(
179-
'Non UTF-8 characters found in columns:\n' + '\n'.join(lines))
172+
template = pd.read_csv(
173+
StringIO(''.join(holdfile)),
174+
sep='\t',
175+
dtype=str,
176+
encoding='utf-8',
177+
infer_datetime_format=False,
178+
keep_default_na=False,
179+
index_col=False,
180+
comment='\t',
181+
converters={index: lambda x: str(x).strip()})
182+
# remove newlines and tabs from fields
183+
template.replace(to_replace='[\t\n\r\x0b\x0c]+', value='',
184+
regex=True, inplace=True)
180185

181186
initial_columns = set(template.columns)
182187

qiita_db/private.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,27 @@ def build_analysis_files(job):
4747
j.submit()
4848
sleep(1)
4949

50+
# The validator jobs no longer finish the job automatically so we need
51+
# to release the validators here
52+
job.release_validators()
5053

51-
TASK_DICT = {'build_analysis_files': build_analysis_files}
54+
55+
def release_validators(job):
56+
"""Waits until all the validators of a job are completed
57+
58+
Parameters
59+
----------
60+
job : qiita_db.processing_job.ProcessingJob
61+
The processing job with the information of the parent job
62+
"""
63+
with qdb.sql_connection.TRN:
64+
qdb.processing_job.ProcessingJob(
65+
job.parameters.values['job']).release_validators()
66+
job._set_status('success')
67+
68+
69+
TASK_DICT = {'build_analysis_files': build_analysis_files,
70+
'release_validators': release_validators}
5271

5372

5473
def private_task(job_id):

0 commit comments

Comments
 (0)