From 19b4fe35a38675148fb8bcd67e4483c8c67760ee Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 21 Aug 2017 08:51:51 -0700 Subject: [PATCH 1/2] fixes 1880 --- .../base_metadata_template.py | 75 +++++++++++--- .../test/test_prep_template.py | 3 +- .../test/test_sample_template.py | 98 ++++++++++++++++++- qiita_ware/dispatchable.py | 3 +- 4 files changed, 157 insertions(+), 22 deletions(-) diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py index 502a31985..6ffcb4b61 100644 --- a/qiita_db/metadata_template/base_metadata_template.py +++ b/qiita_db/metadata_template/base_metadata_template.py @@ -1124,7 +1124,7 @@ def extend(self, md_template): self.validate(self.columns_restrictions) self.generate_files() - def update(self, md_template): + def _update(self, md_template): r"""Update values in the template Parameters @@ -1143,22 +1143,19 @@ def update(self, md_template): passed md_template """ with qdb.sql_connection.TRN: - # Clean and validate the metadata template given - new_map = self._clean_validate_template( - md_template, self.study_id, current_columns=self.categories()) # Retrieving current metadata current_map = self.to_dataframe() # simple validations of sample ids and column names - samples_diff = set(new_map.index).difference(current_map.index) + samples_diff = set(md_template.index).difference(current_map.index) if samples_diff: raise qdb.exceptions.QiitaDBError( 'The new template differs from what is stored ' 'in database by these samples names: %s' % ', '.join(samples_diff)) - if not set(current_map.columns).issuperset(new_map.columns): - columns_diff = set(new_map.columns).difference( + if not set(current_map.columns).issuperset(md_template.columns): + columns_diff = set(md_template.columns).difference( current_map.columns) raise qdb.exceptions.QiitaDBError( 'Some of the columns in your template are not present in ' @@ -1168,15 +1165,16 @@ def update(self, md_template): # In order to speed up some computation, let's compare only the # common columns and rows. current_map.columns and - # current_map.index are supersets of new_map.columns and - # new_map.index, respectivelly, so this will not fail - current_map = current_map[new_map.columns].loc[new_map.index] + # current_map.index are supersets of md_template.columns and + # md_template.index, respectivelly, so this will not fail + current_map = current_map[ + md_template.columns].loc[md_template.index] # Get the values that we need to change # diff_map is a DataFrame that hold boolean values. If a cell is - # True, means that the new_map is different from the current_map - # while False means that the cell has the same value - diff_map = current_map != new_map + # True, means that the md_template is different from the + # current_map while False means that the cell has the same value + diff_map = current_map != md_template # ne_stacked holds a MultiIndexed DataFrame in which the first # level of indexing is the sample_name and the second one is the # columns. We only have 1 column, which holds if that @@ -1195,8 +1193,8 @@ def update(self, md_template): changed.index.names = ['sample_name', 'column'] # the combination of np.where and boolean indexing produces # a numpy array with only the values that actually changed - # between the current_map and new_map - changed_to = new_map.values[np.where(diff_map)] + # between the current_map and md_template + changed_to = md_template.values[np.where(diff_map)] # to_update is a MultiIndexed DataFrame, in which the index 0 is # the samples and the index 1 is the columns, we define these @@ -1235,12 +1233,57 @@ def update(self, md_template): """.format(self._table_name(self._id), sql_eq_cols, single_value, sql_cols) for sample in samples_to_update: - sample_vals = [new_map[col][sample] for col in cols_to_update] + sample_vals = [md_template[col][sample] + for col in cols_to_update] sample_vals.insert(0, sample) qdb.sql_connection.TRN.add(sql, sample_vals) qdb.sql_connection.TRN.execute() + def update(self, md_template): + r"""Update values in the template + + Parameters + ---------- + md_template : DataFrame + The metadata template file contents indexed by samples ids + + Raises + ------ + QiitaDBError + If md_template and db do not have the same sample ids + If md_template and db do not have the same column headers + If self.can_be_updated is not True + QiitaDBWarning + If there are no differences between the contents of the DB and the + passed md_template + """ + with qdb.sql_connection.TRN: + # Clean and validate the metadata template given + new_map = self._clean_validate_template( + md_template, self.study_id, current_columns=self.categories()) + self._update(new_map) + self.validate(self.columns_restrictions) + self.generate_files() + + def extend_and_update(self, md_template): + """Performs the update and extend operations at once + + Parameters + ---------- + md_template : DataFrame + The metadata template contents indexed by sample ids + + See Also + -------- + update + extend + """ + with qdb.sql_connection.TRN: + md_template = self._clean_validate_template( + md_template, self.study_id, current_columns=self.categories()) + self._common_extend_steps(md_template) + self._update(md_template) self.validate(self.columns_restrictions) self.generate_files() diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py index 82d72a79a..f0d50faae 100644 --- a/qiita_db/metadata_template/test/test_prep_template.py +++ b/qiita_db/metadata_template/test/test_prep_template.py @@ -1280,8 +1280,7 @@ def test_extend_update(self): self.metadata['str_column']['SKB7.640196'] = 'NEW VAL' npt.assert_warns( - qdb.exceptions.QiitaDBWarning, pt.extend, self.metadata) - pt.update(self.metadata) + qdb.exceptions.QiitaDBWarning, pt.extend_and_update, self.metadata) sql = "SELECT * FROM qiita.prep_{0}".format(pt.id) obs = [dict(o) for o in self.conn_handler.execute_fetchall(sql)] diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index b1ae10680..caef2ff2e 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -1732,8 +1732,8 @@ def test_extend_update(self): md_ext['TOT_NITRO'] = pd.Series(['val1', 'val2', 'val3', 'val4'], index=md_ext.index) - npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext) - st.update(md_ext) + npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend_and_update, + md_ext) exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id, "%s.Sample3" % st.id, "%s.Sample4" % st.id} self.assertEqual(st._get_sample_ids(), exp_sample_ids) @@ -1800,6 +1800,100 @@ def test_extend_update(self): for s_id in exp_sample_ids: self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) + # def test_extend_and_update(self): + # st = qdb.metadata_template.sample_template.SampleTemplate.create( + # self.metadata, self.new_study) + # self.metadata_dict['Sample4'] = { + # 'physical_specimen_location': 'location1', + # 'physical_specimen_remaining': 'true', + # 'dna_extracted': 'true', + # 'sample_type': 'type1', + # 'collection_timestamp': '2014-05-29 12:24:15', + # 'host_subject_id': 'NotIdentified', + # 'Description': 'Test Sample 4', + # 'latitude': '42.42', + # 'longitude': '41.41', + # 'taxon_id': '9606', + # 'scientific_name': 'homo sapiens'} + # + # # Change a couple of values on the existent samples to test that + # # they actually change + # self.metadata_dict['Sample1']['Description'] = 'Changed' + # self.metadata_dict['Sample2']['dna_extracted'] = 'Changed dynamic' + # + # md_ext = pd.DataFrame.from_dict(self.metadata_dict, orient='index', + # dtpye=str) + # md_ext['TOT_NITRO'] = pd.Series(['val1', 'val2', 'val3', 'val4'], + # index=md_ext.index) + # + # npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext) + # + # exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id, + # "%s.Sample3" % st.id, "%s.Sample4" % st.id} + # self.assertEqual(st._get_sample_ids(), exp_sample_ids) + # self.assertEqual(len(st), 4) + # exp_categories = {'collection_timestamp', 'description', + # 'dna_extracted', 'host_subject_id', 'latitude', + # 'longitude', 'physical_specimen_location', + # 'physical_specimen_remaining', 'sample_type', + # 'scientific_name', 'taxon_id', 'tot_nitro'} + # self.assertItemsEqual(st.categories(), exp_categories) + # exp_dict = { + # "%s.Sample1" % st.id: { + # 'collection_timestamp': '2014-05-29 12:24:15', + # 'description': "Changed", + # 'dna_extracted': 'true', + # 'host_subject_id': "NotIdentified", + # 'latitude': '42.42', + # 'longitude': '41.41', + # 'physical_specimen_location': "location1", + # 'physical_specimen_remaining': 'true', + # 'sample_type': "type1", + # 'taxon_id': '9606', + # 'scientific_name': 'homo sapiens', + # 'tot_nitro': 'val1'}, + # "%s.Sample2" % st.id: { + # 'collection_timestamp': '2014-05-29 12:24:15', + # 'description': "Test Sample 2", + # 'dna_extracted': 'Changed dynamic', + # 'host_subject_id': "NotIdentified", + # 'latitude': '4.2', + # 'longitude': '1.1', + # 'physical_specimen_location': "location1", + # 'physical_specimen_remaining': 'true', + # 'sample_type': "type1", + # 'taxon_id': '9606', + # 'scientific_name': 'homo sapiens', + # 'tot_nitro': 'val2'}, + # "%s.Sample3" % st.id: { + # 'collection_timestamp': '2014-05-29 12:24:15', + # 'description': "Test Sample 3", + # 'dna_extracted': 'true', + # 'host_subject_id': "NotIdentified", + # 'latitude': '4.8', + # 'longitude': '4.41', + # 'physical_specimen_location': "location1", + # 'physical_specimen_remaining': 'true', + # 'sample_type': "type1", + # 'taxon_id': '9606', + # 'scientific_name': 'homo sapiens', + # 'tot_nitro': 'val3'}, + # '%s.Sample4' % st.id: { + # 'physical_specimen_location': 'location1', + # 'physical_specimen_remaining': 'true', + # 'dna_extracted': 'true', + # 'sample_type': 'type1', + # 'collection_timestamp': '2014-05-29 12:24:15', + # 'host_subject_id': 'NotIdentified', + # 'description': 'Test Sample 4', + # 'latitude': '42.42', + # 'longitude': '41.41', + # 'taxon_id': '9606', + # 'scientific_name': 'homo sapiens', + # 'tot_nitro': 'val4'}} + # for s_id in exp_sample_ids: + # self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) + def test_to_dataframe(self): st = qdb.metadata_template.sample_template.SampleTemplate.create( self.metadata, self.new_study) diff --git a/qiita_ware/dispatchable.py b/qiita_ware/dispatchable.py index 2aaafaa8b..c111fa6b9 100644 --- a/qiita_ware/dispatchable.py +++ b/qiita_ware/dispatchable.py @@ -199,8 +199,7 @@ def update_sample_template(study_id, fp): # deleting previous uploads and inserting new one st = SampleTemplate(study_id) df = load_template_to_dataframe(fp) - st.extend(df) - st.update(df) + st.extend_and_update(df) remove(fp) # join all the warning messages into one. Note that this info From d1bbe4623b64e1930cc69f62e9c3e233eb91f229 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 21 Aug 2017 08:53:56 -0700 Subject: [PATCH 2/2] Removing commented code --- .../test/test_sample_template.py | 94 ------------------- 1 file changed, 94 deletions(-) diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index caef2ff2e..2cdc9a27a 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -1800,100 +1800,6 @@ def test_extend_update(self): for s_id in exp_sample_ids: self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) - # def test_extend_and_update(self): - # st = qdb.metadata_template.sample_template.SampleTemplate.create( - # self.metadata, self.new_study) - # self.metadata_dict['Sample4'] = { - # 'physical_specimen_location': 'location1', - # 'physical_specimen_remaining': 'true', - # 'dna_extracted': 'true', - # 'sample_type': 'type1', - # 'collection_timestamp': '2014-05-29 12:24:15', - # 'host_subject_id': 'NotIdentified', - # 'Description': 'Test Sample 4', - # 'latitude': '42.42', - # 'longitude': '41.41', - # 'taxon_id': '9606', - # 'scientific_name': 'homo sapiens'} - # - # # Change a couple of values on the existent samples to test that - # # they actually change - # self.metadata_dict['Sample1']['Description'] = 'Changed' - # self.metadata_dict['Sample2']['dna_extracted'] = 'Changed dynamic' - # - # md_ext = pd.DataFrame.from_dict(self.metadata_dict, orient='index', - # dtpye=str) - # md_ext['TOT_NITRO'] = pd.Series(['val1', 'val2', 'val3', 'val4'], - # index=md_ext.index) - # - # npt.assert_warns(qdb.exceptions.QiitaDBWarning, st.extend, md_ext) - # - # exp_sample_ids = {"%s.Sample1" % st.id, "%s.Sample2" % st.id, - # "%s.Sample3" % st.id, "%s.Sample4" % st.id} - # self.assertEqual(st._get_sample_ids(), exp_sample_ids) - # self.assertEqual(len(st), 4) - # exp_categories = {'collection_timestamp', 'description', - # 'dna_extracted', 'host_subject_id', 'latitude', - # 'longitude', 'physical_specimen_location', - # 'physical_specimen_remaining', 'sample_type', - # 'scientific_name', 'taxon_id', 'tot_nitro'} - # self.assertItemsEqual(st.categories(), exp_categories) - # exp_dict = { - # "%s.Sample1" % st.id: { - # 'collection_timestamp': '2014-05-29 12:24:15', - # 'description': "Changed", - # 'dna_extracted': 'true', - # 'host_subject_id': "NotIdentified", - # 'latitude': '42.42', - # 'longitude': '41.41', - # 'physical_specimen_location': "location1", - # 'physical_specimen_remaining': 'true', - # 'sample_type': "type1", - # 'taxon_id': '9606', - # 'scientific_name': 'homo sapiens', - # 'tot_nitro': 'val1'}, - # "%s.Sample2" % st.id: { - # 'collection_timestamp': '2014-05-29 12:24:15', - # 'description': "Test Sample 2", - # 'dna_extracted': 'Changed dynamic', - # 'host_subject_id': "NotIdentified", - # 'latitude': '4.2', - # 'longitude': '1.1', - # 'physical_specimen_location': "location1", - # 'physical_specimen_remaining': 'true', - # 'sample_type': "type1", - # 'taxon_id': '9606', - # 'scientific_name': 'homo sapiens', - # 'tot_nitro': 'val2'}, - # "%s.Sample3" % st.id: { - # 'collection_timestamp': '2014-05-29 12:24:15', - # 'description': "Test Sample 3", - # 'dna_extracted': 'true', - # 'host_subject_id': "NotIdentified", - # 'latitude': '4.8', - # 'longitude': '4.41', - # 'physical_specimen_location': "location1", - # 'physical_specimen_remaining': 'true', - # 'sample_type': "type1", - # 'taxon_id': '9606', - # 'scientific_name': 'homo sapiens', - # 'tot_nitro': 'val3'}, - # '%s.Sample4' % st.id: { - # 'physical_specimen_location': 'location1', - # 'physical_specimen_remaining': 'true', - # 'dna_extracted': 'true', - # 'sample_type': 'type1', - # 'collection_timestamp': '2014-05-29 12:24:15', - # 'host_subject_id': 'NotIdentified', - # 'description': 'Test Sample 4', - # 'latitude': '42.42', - # 'longitude': '41.41', - # 'taxon_id': '9606', - # 'scientific_name': 'homo sapiens', - # 'tot_nitro': 'val4'}} - # for s_id in exp_sample_ids: - # self.assertEqual(st[s_id]._to_dict(), exp_dict[s_id]) - def test_to_dataframe(self): st = qdb.metadata_template.sample_template.SampleTemplate.create( self.metadata, self.new_study)