Skip to content

Fixes 1880 #2247

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 59 additions & 16 deletions qiita_db/metadata_template/base_metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,7 +1124,7 @@ def extend(self, md_template):
self.validate(self.columns_restrictions)
self.generate_files()

def update(self, md_template):
def _update(self, md_template):
r"""Update values in the template

Parameters
Expand All @@ -1143,22 +1143,19 @@ def update(self, md_template):
passed md_template
"""
with qdb.sql_connection.TRN:
# Clean and validate the metadata template given
new_map = self._clean_validate_template(
md_template, self.study_id, current_columns=self.categories())
# Retrieving current metadata
current_map = self.to_dataframe()

# simple validations of sample ids and column names
samples_diff = set(new_map.index).difference(current_map.index)
samples_diff = set(md_template.index).difference(current_map.index)
if samples_diff:
raise qdb.exceptions.QiitaDBError(
'The new template differs from what is stored '
'in database by these samples names: %s'
% ', '.join(samples_diff))

if not set(current_map.columns).issuperset(new_map.columns):
columns_diff = set(new_map.columns).difference(
if not set(current_map.columns).issuperset(md_template.columns):
columns_diff = set(md_template.columns).difference(
current_map.columns)
raise qdb.exceptions.QiitaDBError(
'Some of the columns in your template are not present in '
Expand All @@ -1168,15 +1165,16 @@ def update(self, md_template):

# In order to speed up some computation, let's compare only the
# common columns and rows. current_map.columns and
# current_map.index are supersets of new_map.columns and
# new_map.index, respectivelly, so this will not fail
current_map = current_map[new_map.columns].loc[new_map.index]
# current_map.index are supersets of md_template.columns and
# md_template.index, respectivelly, so this will not fail
current_map = current_map[
md_template.columns].loc[md_template.index]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason to do this way instead of current_map = current_map.loc[md_template.index, md_template.columns]?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. I know that sometimes one option is creating a view while the other one returning an actual pandas DataFrame object, and that can raise Warning in some operations. I don't think it actually matters here, but I wanted to minimize code modifications (I'm actually just moving code around in this PR to be able to reuse code easily).


# Get the values that we need to change
# diff_map is a DataFrame that hold boolean values. If a cell is
# True, means that the new_map is different from the current_map
# while False means that the cell has the same value
diff_map = current_map != new_map
# True, means that the md_template is different from the
# current_map while False means that the cell has the same value
diff_map = current_map != md_template
# ne_stacked holds a MultiIndexed DataFrame in which the first
# level of indexing is the sample_name and the second one is the
# columns. We only have 1 column, which holds if that
Expand All @@ -1195,8 +1193,8 @@ def update(self, md_template):
changed.index.names = ['sample_name', 'column']
# the combination of np.where and boolean indexing produces
# a numpy array with only the values that actually changed
# between the current_map and new_map
changed_to = new_map.values[np.where(diff_map)]
# between the current_map and md_template
changed_to = md_template.values[np.where(diff_map)]

# to_update is a MultiIndexed DataFrame, in which the index 0 is
# the samples and the index 1 is the columns, we define these
Expand Down Expand Up @@ -1235,12 +1233,57 @@ def update(self, md_template):
""".format(self._table_name(self._id), sql_eq_cols,
single_value, sql_cols)
for sample in samples_to_update:
sample_vals = [new_map[col][sample] for col in cols_to_update]
sample_vals = [md_template[col][sample]
for col in cols_to_update]
sample_vals.insert(0, sample)
qdb.sql_connection.TRN.add(sql, sample_vals)

qdb.sql_connection.TRN.execute()

def update(self, md_template):
r"""Update values in the template

Parameters
----------
md_template : DataFrame
The metadata template file contents indexed by samples ids

Raises
------
QiitaDBError
If md_template and db do not have the same sample ids
If md_template and db do not have the same column headers
If self.can_be_updated is not True
QiitaDBWarning
If there are no differences between the contents of the DB and the
passed md_template
"""
with qdb.sql_connection.TRN:
# Clean and validate the metadata template given
new_map = self._clean_validate_template(
md_template, self.study_id, current_columns=self.categories())
self._update(new_map)
self.validate(self.columns_restrictions)
self.generate_files()

def extend_and_update(self, md_template):
"""Performs the update and extend operations at once

Parameters
----------
md_template : DataFrame
The metadata template contents indexed by sample ids

See Also
--------
update
extend
"""
with qdb.sql_connection.TRN:
md_template = self._clean_validate_template(
md_template, self.study_id, current_columns=self.categories())
self._common_extend_steps(md_template)
self._update(md_template)
self.validate(self.columns_restrictions)
self.generate_files()

Expand Down
3 changes: 1 addition & 2 deletions qiita_db/metadata_template/test/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -1280,8 +1280,7 @@ def test_extend_update(self):
self.metadata['str_column']['SKB7.640196'] = 'NEW VAL'

npt.assert_warns(
qdb.exceptions.QiitaDBWarning, pt.extend, self.metadata)
pt.update(self.metadata)
qdb.exceptions.QiitaDBWarning, pt.extend_and_update, self.metadata)

sql = "SELECT * FROM qiita.prep_{0}".format(pt.id)
obs = [dict(o) for o in self.conn_handler.execute_fetchall(sql)]
Expand Down
4 changes: 2 additions & 2 deletions qiita_db/metadata_template/test/test_sample_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -1732,8 +1732,8 @@ def test_extend_update(self):
md_ext['TOT_NITRO'] = pd.Series(['val1', 'val2', 'val3', 'val4'],
index=md_ext.index)

npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext)
st.update(md_ext)
npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend_and_update,
md_ext)
exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id,
"%s.Sample3" % st.id, "%s.Sample4" % st.id}
self.assertEqual(st._get_sample_ids(), exp_sample_ids)
Expand Down
3 changes: 1 addition & 2 deletions qiita_ware/dispatchable.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,7 @@ def update_sample_template(study_id, fp):
# deleting previous uploads and inserting new one
st = SampleTemplate(study_id)
df = load_template_to_dataframe(fp)
st.extend(df)
st.update(df)
st.extend_and_update(df)
remove(fp)

# join all the warning messages into one. Note that this info
Expand Down