diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py index f0d50faae..15e0ee3b8 100644 --- a/qiita_db/metadata_template/test/test_prep_template.py +++ b/qiita_db/metadata_template/test/test_prep_template.py @@ -887,9 +887,7 @@ def test_create(self): def test_create_already_prefixed_samples(self): """Creates a new PrepTemplate""" fp_count = qdb.util.get_count('qiita.filepath') - pt = npt.assert_warns( - qdb.exceptions.QiitaDBWarning, - qdb.metadata_template.prep_template.PrepTemplate.create, + pt = qdb.metadata_template.prep_template.PrepTemplate.create( self.metadata_prefixed, self.test_study, self.data_type) self._common_creation_checks(pt, fp_count) diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py index 2cdc9a27a..9713c08a3 100644 --- a/qiita_db/metadata_template/test/test_sample_template.py +++ b/qiita_db/metadata_template/test/test_sample_template.py @@ -1084,9 +1084,7 @@ def test_create_str_prefixes(self): def test_create_already_prefixed_samples(self): """Creates a new SampleTemplate with the samples already prefixed""" - st = npt.assert_warns( - qdb.exceptions.QiitaDBWarning, - qdb.metadata_template.sample_template.SampleTemplate.create, + st = qdb.metadata_template.sample_template.SampleTemplate.create( self.metadata_prefixed, self.new_study) new_id = self.new_study.id # The returned object has the correct id diff --git a/qiita_db/metadata_template/test/test_util.py b/qiita_db/metadata_template/test/test_util.py index d76abbcbd..7de4dd3c2 100644 --- a/qiita_db/metadata_template/test/test_util.py +++ b/qiita_db/metadata_template/test/test_util.py @@ -8,6 +8,7 @@ from six import StringIO from unittest import TestCase, main +import warnings import numpy.testing as npt import pandas as pd @@ -36,12 +37,31 @@ def test_prefix_sample_names_with_id(self): } exp_df = pd.DataFrame.from_dict(exp_metadata_dict, orient='index', dtype=str) - qdb.metadata_template.util.prefix_sample_names_with_id( - self.metadata_map, 1) + with warnings.catch_warnings(record=True) as warn: + qdb.metadata_template.util.prefix_sample_names_with_id( + self.metadata_map, 1) + self.assertEqual(len(warn), 0) self.metadata_map.sort_index(inplace=True) exp_df.sort_index(inplace=True) assert_frame_equal(self.metadata_map, exp_df) + # test that it only prefixes the samples that are needed + metadata_dict = { + 'Sample1': {'int_col': 1, 'float_col': 2.1, 'str_col': 'str1'}, + '1.Sample2': {'int_col': 2, 'float_col': 3.1, 'str_col': '200'}, + 'Sample3': {'int_col': 3, 'float_col': 3, 'str_col': 'string30'}, + } + metadata_map = pd.DataFrame.from_dict( + metadata_dict, orient='index', dtype=str) + with warnings.catch_warnings(record=True) as warn: + qdb.metadata_template.util.prefix_sample_names_with_id( + metadata_map, 1) + self.assertEqual(len(warn), 1) + self.assertEqual(str(warn[0].message), 'Some of the samples were ' + 'already prefixed with the study id.') + metadata_map.sort_index(inplace=True) + assert_frame_equal(metadata_map, exp_df) + def test_load_template_to_dataframe(self): obs = qdb.metadata_template.util.load_template_to_dataframe( StringIO(EXP_SAMPLE_TEMPLATE)) diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py index 9a2e4d4f9..949c2d48d 100644 --- a/qiita_db/metadata_template/util.py +++ b/qiita_db/metadata_template/util.py @@ -34,29 +34,25 @@ def prefix_sample_names_with_id(md_template, study_id): study_id : int The study to which the metadata belongs to """ - # Get all the prefixes of the index, defined as any string before a '.' - prefixes = {idx.split('.', 1)[0] for idx in md_template.index} - # If the samples have been already prefixed with the study id, the prefixes - # set will contain only one element and it will be the str representation - # of the study id - if len(prefixes) == 1 and prefixes.pop() == str(study_id): - # The samples were already prefixed with the study id - warnings.warn("Sample names were already prefixed with the study id.", - qdb.exceptions.QiitaDBWarning) - else: - # Create a new pandas series in which all the values are the study_id - # and it is indexed as the metadata template - study_ids = pd.Series([str(study_id)] * len(md_template.index), - index=md_template.index) - # Create a new column on the metadata template that includes the - # metadata template indexes prefixed with the study id - md_template['sample_name_with_id'] = (study_ids + '.' + - md_template.index.values) - md_template.index = md_template.sample_name_with_id - del md_template['sample_name_with_id'] - # The original metadata template had the index column unnamed - remove - # the name of the index for consistency - md_template.index.name = None + # loop over the samples and prefix those that aren't prefixed + md_template['qiita_sample_name_with_id'] = pd.Series( + [idx if idx.split('.', 1)[0] == str(study_id) + else '%d.%s' % (study_id, idx) + for idx in md_template.index], index=md_template.index) + + # get the rows that are gonna change + changes = len(md_template.index[ + md_template['qiita_sample_name_with_id'] != md_template.index]) + if changes != 0 and changes != len(md_template.index): + warnings.warn( + "Some of the samples were already prefixed with the study id.", + qdb.exceptions.QiitaDBWarning) + + md_template.index = md_template.qiita_sample_name_with_id + del md_template['qiita_sample_name_with_id'] + # The original metadata template had the index column unnamed -> remove + # the name of the index for consistency + md_template.index.name = None def load_template_to_dataframe(fn, index='sample_name'): diff --git a/qiita_pet/handlers/api_proxy/tests/test_prep_template.py b/qiita_pet/handlers/api_proxy/tests/test_prep_template.py index edccba2b2..380a01294 100644 --- a/qiita_pet/handlers/api_proxy/tests/test_prep_template.py +++ b/qiita_pet/handlers/api_proxy/tests/test_prep_template.py @@ -380,7 +380,6 @@ def test_prep_template_post_req(self): '16S') exp = {'status': 'warning', 'message': [ - 'Sample names were already prefixed with the study id.', ('Some columns required to generate a QIIME-compliant ' 'mapping file are not present in the template. A ' 'placeholder value (XXQIITAXX) has been used to populate '