qiita-spots · josenavas · Jan 4, 2017 · Dec 27, 2016 · Dec 27, 2016 · Dec 27, 2016
diff --git a/qiita_db/handlers/tests/test_prep_template.py b/qiita_db/handlers/tests/test_prep_template.py
@@ -122,7 +122,8 @@ def test_get(self):
             'sequencing_meth': 'Sequencing by synthesis',
             'study_center': 'CCME',
             'target_gene': '16S rRNA',
-            'target_subfragment': 'V4'}
+            'target_subfragment': 'V4',
+            'qiita_prep_id': '1'}
         self.assertEqual(obs, exp)
 
 

diff --git a/qiita_db/metadata_template/base_metadata_template.py b/qiita_db/metadata_template/base_metadata_template.py
@@ -1062,6 +1062,10 @@ def to_dataframe(self):
             # Make sure that we are changing np.NaN by Nones
             df.where((pd.notnull(df)), None)
             df.set_index('sample_id', inplace=True, drop=True)
+            id_column_name = 'qiita_%sid' % (self._table_prefix)
+            if id_column_name == 'qiita_sample_id':
+                id_column_name = 'qiita_study_id'
+            df[id_column_name] = str(self.id)
 
             return df
 

diff --git a/qiita_db/metadata_template/test/test_prep_template.py b/qiita_db/metadata_template/test/test_prep_template.py
@@ -656,7 +656,7 @@ def test_to_dataframe(self):
             u'experiment_design_description', u'experiment_title', u'platform',
             u'instrument_model', u'samp_size', u'sequencing_meth',
             u'illumina_technology', u'sample_center', u'pcr_primers',
-            u'study_center'})
+            u'study_center', 'qiita_prep_id'})
 
     def test_clean_validate_template_error_bad_chars(self):
         """Raises an error if there are invalid characters in the sample names
@@ -1081,7 +1081,7 @@ def test_to_file(self):
         self._clean_up_files.append(fp)
         with open(fp, 'U') as f:
             obs = f.read()
-        self.assertEqual(obs, EXP_PREP_TEMPLATE)
+        self.assertEqual(obs, EXP_PREP_TEMPLATE.format(pt.id))
 
     def test_investigation_type_setter(self):
         """Able to update the investigation type"""
@@ -1494,15 +1494,15 @@ def test_delete_sample(self):
     'sample_name\tbarcode\tcenter_name\tcenter_project_name\t'
     'ebi_submission_accession\temp_status\texperiment_design_description\t'
     'instrument_model\tlibrary_construction_protocol\tplatform\tprimer\t'
-    'run_prefix\tstr_column\n'
+    'qiita_prep_id\trun_prefix\tstr_column\n'
     '1.SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\t\tEMP\tBBBB\t'
-    'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
+    'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
     's_G1_L002_sequences\tValue for sample 3\n'
     '1.SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\t\tEMP\tBBBB\t'
-    'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
+    'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
     's_G1_L001_sequences\tValue for sample 1\n'
     '1.SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\t\tEMP\tBBBB\t'
-    'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
+    'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
     's_G1_L001_sequences\tValue for sample 2\n')
 
 

diff --git a/qiita_db/metadata_template/test/test_sample_template.py b/qiita_db/metadata_template/test/test_sample_template.py
@@ -1806,8 +1806,9 @@ def test_to_dataframe(self):
             self.metadata, self.new_study)
         obs = st.to_dataframe()
 
+        new_id = self.new_study.id
         exp_dict = {
-            '%s.Sample1' % self.new_study.id: {
+            '%s.Sample1' % new_id: {
                 'physical_specimen_location': 'location1',
                 'physical_specimen_remaining': 'true',
                 'dna_extracted': 'true',
@@ -1819,8 +1820,9 @@ def test_to_dataframe(self):
                 'latitude': '42.42',
                 'longitude': '41.41',
                 'taxon_id': '9606',
+                'qiita_study_id': str(new_id),
                 'scientific_name': 'homo sapiens'},
-            '%s.Sample2' % self.new_study.id: {
+            '%s.Sample2' % new_id: {
                 'physical_specimen_location': 'location1',
                 'physical_specimen_remaining': 'true',
                 'dna_extracted': 'true',
@@ -1832,8 +1834,9 @@ def test_to_dataframe(self):
                 'latitude': '4.2',
                 'longitude': '1.1',
                 'taxon_id': '9606',
+                'qiita_study_id': str(new_id),
                 'scientific_name': 'homo sapiens'},
-            '%s.Sample3' % self.new_study.id: {
+            '%s.Sample3' % new_id: {
                 'physical_specimen_location': 'location1',
                 'physical_specimen_remaining': 'true',
                 'dna_extracted': 'true',
@@ -1845,6 +1848,7 @@ def test_to_dataframe(self):
                 'latitude': '4.8',
                 'longitude': '4.41',
                 'taxon_id': '9606',
+                'qiita_study_id': str(new_id),
                 'scientific_name': 'homo sapiens'},
             }
         exp = pd.DataFrame.from_dict(exp_dict, orient='index', dtype=str)
@@ -1881,7 +1885,7 @@ def test_to_dataframe(self):
             'water_content_soil', 'elevation', 'temp', 'tot_nitro',
             'samp_salinity', 'altitude', 'env_biome', 'country', 'ph',
             'anonymized_name', 'tot_org_carb', 'description_duplicate',
-            'env_feature', 'scientific_name'})
+            'env_feature', 'scientific_name', 'qiita_study_id'})
 
     def test_check_restrictions(self):
         obs = self.tester.check_restrictions(
@@ -2166,22 +2170,24 @@ def test_delete_sample(self):
 EXP_SAMPLE_TEMPLATE = (
     "sample_name\tcollection_timestamp\tdescription\tdna_extracted\t"
     "host_subject_id\tlatitude\tlongitude\tphysical_specimen_location\t"
-    "physical_specimen_remaining\tsample_type\tscientific_name\ttaxon_id\n"
+    "physical_specimen_remaining\tqiita_study_id\tsample_type\t"
+    "scientific_name\ttaxon_id\n"
     "{0}.Sample1\t05/29/2014 12:24:15\tTest Sample 1\ttrue\tNotIdentified\t"
-    "42.42\t41.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
+    "42.42\t41.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
     "{0}.Sample2\t05/29/2014 12:24:15\tTest Sample 2\ttrue\tNotIdentified\t"
-    "4.2\t1.1\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
+    "4.2\t1.1\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
     "{0}.Sample3\t05/29/2014 12:24:15\tTest Sample 3\ttrue\tNotIdentified\t"
-    "4.8\t4.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n")
+    "4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n")
 
 EXP_SAMPLE_TEMPLATE_FEWER_SAMPLES = (
     "sample_name\tcollection_timestamp\tdescription\tdna_extracted\t"
     "host_subject_id\tlatitude\tlongitude\tphysical_specimen_location\t"
-    "physical_specimen_remaining\tsample_type\tscientific_name\ttaxon_id\n"
+    "physical_specimen_remaining\tqiita_study_id\tsample_type\t"
+    "scientific_name\ttaxon_id\n"
     "{0}.Sample1\t05/29/2014 12:24:15\tTest Sample 1\ttrue\tNotIdentified\t"
-    "42.42\t41.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
+    "42.42\t41.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
     "{0}.Sample3\t05/29/2014 12:24:15\tTest Sample 3\ttrue\tNotIdentified\t"
-    "4.8\t4.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n")
+    "4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n")
 
 
 if __name__ == '__main__':

diff --git a/qiita_db/metadata_template/util.py b/qiita_db/metadata_template/util.py
@@ -262,7 +262,10 @@ def validate_invalid_column_names(column_names):
     # tests.
     forbidden_values = {
         # https://github.com/biocore/qiita/issues/2026
-        'sampleid'
+        'sampleid',
+        # https://github.com/biocore/qiita/issues/1866
+        'qiita_study_id',
+        'qiita_prep_id'
     }
     forbidden = forbidden_values & column_names
 

diff --git a/qiita_db/support_files/test_data/analysis/1_analysis_mapping_exp.txt b/qiita_db/support_files/test_data/analysis/1_analysis_mapping_exp.txt
@@ -1,4 +1,4 @@
-#SampleID	BarcodeSequence	LinkerPrimerSequence	center_name	center_project_name	emp_status	experiment_center	experiment_design_description	experiment_title	illumina_technology	instrument_model	library_construction_protocol	pcr_primers	platform	run_center	run_date	run_prefix	samp_size	sample_center	sequencing_meth	study_center	target_gene	target_subfragment	altitude	anonymized_name	assigned_from_geo	collection_timestamp	common_name	country	depth	description_duplicate	elevation	env_biome	env_feature	has_extracted_data	has_physical_specimen	host_subject_id	host_taxid	latitude	longitude	ph	physical_location	samp_salinity	sample_type	season_environment	taxon_id	temp	texture	tot_nitro	tot_org_carb	water_content_soil	qiita_study_title	qiita_study_alias	qiita_owner	qiita_principal_investigator	Description
-1.SKB8.640193	AGCGCTCACATC	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	Illumina MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	0.0	SKB8	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Burmese root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:M7	3483	74.0894932572	65.3283470202	6.94	ANL	7.15	ENVO:soil	winter	1118232	15.0	64.6 sand, 17.6 silt, 17.8 clay	1.41	5.0	0.164	Identification of the Microbiomes for Cannabis Soils	Cannabis Soils	Dude	PIDude	Cannabis Soil Microbiome
-1.SKD8.640184	TGAGTGGTCTGT	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	Illumina MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	0.0	SKD8	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Diesel Root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:D9	3483	57.571893782	32.5563076447	6.8	ANL	7.1	ENVO:soil	winter	1118232	15.0	66 sand, 16.3 silt, 17.7 clay	1.51	4.32	0.178	Identification of the Microbiomes for Cannabis Soils	Cannabis Soils	Dude	PIDude	Cannabis Soil Microbiome
-1.SKB7.640196	CGGCCTAAGTTC	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	Illumina MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	0.0	SKB7	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Burmese root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:M8	3483	13.089194595	92.5274472082	6.94	ANL	7.15	ENVO:soil	winter	1118232	15.0	64.6 sand, 17.6 silt, 17.8 clay	1.41	5.0	0.164	Identification of the Microbiomes for Cannabis Soils	Cannabis Soils	Dude	PIDude	Cannabis Soil Microbiome
+#SampleID	BarcodeSequence	LinkerPrimerSequence	center_name	center_project_name	emp_status	experiment_center	experiment_design_description	experiment_title	illumina_technology	instrument_model	library_construction_protocol	pcr_primers	platform	run_center	run_date	run_prefix	samp_size	sample_center	sequencing_meth	study_center	target_gene	target_subfragment	qiita_prep_id	altitude	anonymized_name	assigned_from_geo	collection_timestamp	common_name	country	depth	description_duplicate	elevation	env_biome	env_feature	has_extracted_data	has_physical_specimen	host_subject_id	host_taxid	latitude	longitude	ph	physical_location	samp_salinity	sample_type	season_environment	taxon_id	temp	texture	tot_nitro	tot_org_carb	water_content_soil	qiita_study_title	qiita_study_alias	qiita_owner	qiita_principal_investigator	Description
+1.SKB8.640193	AGCGCTCACATC	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	Illumina MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	1	0.0	SKB8	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Burmese root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:M7	3483	74.0894932572	65.3283470202	6.94	ANL	7.15	ENVO:soil	winter	1118232	15.0	64.6 sand, 17.6 silt, 17.8 clay	1.41	5.0	0.164	Identification of the Microbiomes for Cannabis Soils	Cannabis Soils	Dude	PIDude	Cannabis Soil Microbiome
+1.SKD8.640184	TGAGTGGTCTGT	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	Illumina MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	1	0.0	SKD8	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Diesel Root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:D9	3483	57.571893782	32.5563076447	6.8	ANL	7.1	ENVO:soil	winter	1118232	15.0	66 sand, 16.3 silt, 17.7 clay	1.51	4.32	0.178	Identification of the Microbiomes for Cannabis Soils	Cannabis Soils	Dude	PIDude	Cannabis Soil Microbiome
+1.SKB7.640196	CGGCCTAAGTTC	GTGCCAGCMGCCGCGGTAA	ANL		EMP	ANL	micro biome of soil and rhizosphere of cannabis plants from CA	Cannabis Soil Microbiome	MiSeq	Illumina MiSeq	This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions.	FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT	Illumina	ANL	8/1/12	s_G1_L001_sequences	.25,g	ANL	Sequencing by synthesis	CCME	16S rRNA	V4	1	0.0	SKB7	n	2011-11-11 13:00:00	root metagenome	GAZ:United States of America	0.15	Burmese root	114.0	ENVO:Temperate grasslands, savannas, and shrubland biome	ENVO:plant-associated habitat	True	True	1001:M8	3483	13.089194595	92.5274472082	6.94	ANL	7.15	ENVO:soil	winter	1118232	15.0	64.6 sand, 17.6 silt, 17.8 clay	1.41	5.0	0.164	Identification of the Microbiomes for Cannabis Soils	Cannabis Soils	Dude	PIDude	Cannabis Soil Microbiome