Skip to content

Commit 58daf17

Browse files
authored
Merge pull request #2033 from antgonza/fix-1866
Fix 1866
2 parents 9c474c8 + bcb3c2f commit 58daf17

File tree

10 files changed

+77
-59
lines changed

10 files changed

+77
-59
lines changed

qiita_db/handlers/tests/test_prep_template.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ def test_get(self):
122122
'sequencing_meth': 'Sequencing by synthesis',
123123
'study_center': 'CCME',
124124
'target_gene': '16S rRNA',
125-
'target_subfragment': 'V4'}
125+
'target_subfragment': 'V4',
126+
'qiita_prep_id': '1'}
126127
self.assertEqual(obs, exp)
127128

128129

qiita_db/metadata_template/base_metadata_template.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,10 @@ def to_dataframe(self):
10621062
# Make sure that we are changing np.NaN by Nones
10631063
df.where((pd.notnull(df)), None)
10641064
df.set_index('sample_id', inplace=True, drop=True)
1065+
id_column_name = 'qiita_%sid' % (self._table_prefix)
1066+
if id_column_name == 'qiita_sample_id':
1067+
id_column_name = 'qiita_study_id'
1068+
df[id_column_name] = str(self.id)
10651069

10661070
return df
10671071

qiita_db/metadata_template/test/test_prep_template.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ def test_to_dataframe(self):
656656
u'experiment_design_description', u'experiment_title', u'platform',
657657
u'instrument_model', u'samp_size', u'sequencing_meth',
658658
u'illumina_technology', u'sample_center', u'pcr_primers',
659-
u'study_center'})
659+
u'study_center', 'qiita_prep_id'})
660660

661661
def test_clean_validate_template_error_bad_chars(self):
662662
"""Raises an error if there are invalid characters in the sample names
@@ -1081,7 +1081,7 @@ def test_to_file(self):
10811081
self._clean_up_files.append(fp)
10821082
with open(fp, 'U') as f:
10831083
obs = f.read()
1084-
self.assertEqual(obs, EXP_PREP_TEMPLATE)
1084+
self.assertEqual(obs, EXP_PREP_TEMPLATE.format(pt.id))
10851085

10861086
def test_investigation_type_setter(self):
10871087
"""Able to update the investigation type"""
@@ -1494,15 +1494,15 @@ def test_delete_sample(self):
14941494
'sample_name\tbarcode\tcenter_name\tcenter_project_name\t'
14951495
'ebi_submission_accession\temp_status\texperiment_design_description\t'
14961496
'instrument_model\tlibrary_construction_protocol\tplatform\tprimer\t'
1497-
'run_prefix\tstr_column\n'
1497+
'qiita_prep_id\trun_prefix\tstr_column\n'
14981498
'1.SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\t\tEMP\tBBBB\t'
1499-
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
1499+
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
15001500
's_G1_L002_sequences\tValue for sample 3\n'
15011501
'1.SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\t\tEMP\tBBBB\t'
1502-
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
1502+
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
15031503
's_G1_L001_sequences\tValue for sample 1\n'
15041504
'1.SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\t\tEMP\tBBBB\t'
1505-
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
1505+
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
15061506
's_G1_L001_sequences\tValue for sample 2\n')
15071507

15081508

qiita_db/metadata_template/test/test_sample_template.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1806,8 +1806,9 @@ def test_to_dataframe(self):
18061806
self.metadata, self.new_study)
18071807
obs = st.to_dataframe()
18081808

1809+
new_id = self.new_study.id
18091810
exp_dict = {
1810-
'%s.Sample1' % self.new_study.id: {
1811+
'%s.Sample1' % new_id: {
18111812
'physical_specimen_location': 'location1',
18121813
'physical_specimen_remaining': 'true',
18131814
'dna_extracted': 'true',
@@ -1819,8 +1820,9 @@ def test_to_dataframe(self):
18191820
'latitude': '42.42',
18201821
'longitude': '41.41',
18211822
'taxon_id': '9606',
1823+
'qiita_study_id': str(new_id),
18221824
'scientific_name': 'homo sapiens'},
1823-
'%s.Sample2' % self.new_study.id: {
1825+
'%s.Sample2' % new_id: {
18241826
'physical_specimen_location': 'location1',
18251827
'physical_specimen_remaining': 'true',
18261828
'dna_extracted': 'true',
@@ -1832,8 +1834,9 @@ def test_to_dataframe(self):
18321834
'latitude': '4.2',
18331835
'longitude': '1.1',
18341836
'taxon_id': '9606',
1837+
'qiita_study_id': str(new_id),
18351838
'scientific_name': 'homo sapiens'},
1836-
'%s.Sample3' % self.new_study.id: {
1839+
'%s.Sample3' % new_id: {
18371840
'physical_specimen_location': 'location1',
18381841
'physical_specimen_remaining': 'true',
18391842
'dna_extracted': 'true',
@@ -1845,6 +1848,7 @@ def test_to_dataframe(self):
18451848
'latitude': '4.8',
18461849
'longitude': '4.41',
18471850
'taxon_id': '9606',
1851+
'qiita_study_id': str(new_id),
18481852
'scientific_name': 'homo sapiens'},
18491853
}
18501854
exp = pd.DataFrame.from_dict(exp_dict, orient='index', dtype=str)
@@ -1881,7 +1885,7 @@ def test_to_dataframe(self):
18811885
'water_content_soil', 'elevation', 'temp', 'tot_nitro',
18821886
'samp_salinity', 'altitude', 'env_biome', 'country', 'ph',
18831887
'anonymized_name', 'tot_org_carb', 'description_duplicate',
1884-
'env_feature', 'scientific_name'})
1888+
'env_feature', 'scientific_name', 'qiita_study_id'})
18851889

18861890
def test_check_restrictions(self):
18871891
obs = self.tester.check_restrictions(
@@ -2166,22 +2170,24 @@ def test_delete_sample(self):
21662170
EXP_SAMPLE_TEMPLATE = (
21672171
"sample_name\tcollection_timestamp\tdescription\tdna_extracted\t"
21682172
"host_subject_id\tlatitude\tlongitude\tphysical_specimen_location\t"
2169-
"physical_specimen_remaining\tsample_type\tscientific_name\ttaxon_id\n"
2173+
"physical_specimen_remaining\tqiita_study_id\tsample_type\t"
2174+
"scientific_name\ttaxon_id\n"
21702175
"{0}.Sample1\t05/29/2014 12:24:15\tTest Sample 1\ttrue\tNotIdentified\t"
2171-
"42.42\t41.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
2176+
"42.42\t41.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
21722177
"{0}.Sample2\t05/29/2014 12:24:15\tTest Sample 2\ttrue\tNotIdentified\t"
2173-
"4.2\t1.1\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
2178+
"4.2\t1.1\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
21742179
"{0}.Sample3\t05/29/2014 12:24:15\tTest Sample 3\ttrue\tNotIdentified\t"
2175-
"4.8\t4.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n")
2180+
"4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n")
21762181

21772182
EXP_SAMPLE_TEMPLATE_FEWER_SAMPLES = (
21782183
"sample_name\tcollection_timestamp\tdescription\tdna_extracted\t"
21792184
"host_subject_id\tlatitude\tlongitude\tphysical_specimen_location\t"
2180-
"physical_specimen_remaining\tsample_type\tscientific_name\ttaxon_id\n"
2185+
"physical_specimen_remaining\tqiita_study_id\tsample_type\t"
2186+
"scientific_name\ttaxon_id\n"
21812187
"{0}.Sample1\t05/29/2014 12:24:15\tTest Sample 1\ttrue\tNotIdentified\t"
2182-
"42.42\t41.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
2188+
"42.42\t41.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
21832189
"{0}.Sample3\t05/29/2014 12:24:15\tTest Sample 3\ttrue\tNotIdentified\t"
2184-
"4.8\t4.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n")
2190+
"4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n")
21852191

21862192

21872193
if __name__ == '__main__':

qiita_db/metadata_template/util.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,10 @@ def validate_invalid_column_names(column_names):
262262
# tests.
263263
forbidden_values = {
264264
# https://github.com/biocore/qiita/issues/2026
265-
'sampleid'
265+
'sampleid',
266+
# https://github.com/biocore/qiita/issues/1866
267+
'qiita_study_id',
268+
'qiita_prep_id'
266269
}
267270
forbidden = forbidden_values & column_names
268271

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#SampleID BarcodeSequence LinkerPrimerSequence center_name center_project_name emp_status experiment_center experiment_design_description experiment_title illumina_technology instrument_model library_construction_protocol pcr_primers platform run_center run_date run_prefix samp_size sample_center sequencing_meth study_center target_gene target_subfragment altitude anonymized_name assigned_from_geo collection_timestamp common_name country depth description_duplicate elevation env_biome env_feature has_extracted_data has_physical_specimen host_subject_id host_taxid latitude longitude ph physical_location samp_salinity sample_type season_environment taxon_id temp texture tot_nitro tot_org_carb water_content_soil qiita_study_title qiita_study_alias qiita_owner qiita_principal_investigator Description
2-
1.SKB8.640193 AGCGCTCACATC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKB8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M7 3483 74.0894932572 65.3283470202 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
3-
1.SKD8.640184 TGAGTGGTCTGT GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKD8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Diesel Root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:D9 3483 57.571893782 32.5563076447 6.8 ANL 7.1 ENVO:soil winter 1118232 15.0 66 sand, 16.3 silt, 17.7 clay 1.51 4.32 0.178 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
4-
1.SKB7.640196 CGGCCTAAGTTC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKB7 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M8 3483 13.089194595 92.5274472082 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
1+
#SampleID BarcodeSequence LinkerPrimerSequence center_name center_project_name emp_status experiment_center experiment_design_description experiment_title illumina_technology instrument_model library_construction_protocol pcr_primers platform run_center run_date run_prefix samp_size sample_center sequencing_meth study_center target_gene target_subfragment qiita_prep_id altitude anonymized_name assigned_from_geo collection_timestamp common_name country depth description_duplicate elevation env_biome env_feature has_extracted_data has_physical_specimen host_subject_id host_taxid latitude longitude ph physical_location samp_salinity sample_type season_environment taxon_id temp texture tot_nitro tot_org_carb water_content_soil qiita_study_title qiita_study_alias qiita_owner qiita_principal_investigator Description
2+
1.SKB8.640193 AGCGCTCACATC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0.0 SKB8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M7 3483 74.0894932572 65.3283470202 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
3+
1.SKD8.640184 TGAGTGGTCTGT GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0.0 SKD8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Diesel Root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:D9 3483 57.571893782 32.5563076447 6.8 ANL 7.1 ENVO:soil winter 1118232 15.0 66 sand, 16.3 silt, 17.7 clay 1.51 4.32 0.178 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
4+
1.SKB7.640196 CGGCCTAAGTTC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0.0 SKB7 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M8 3483 13.089194595 92.5274472082 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome

0 commit comments

Comments
 (0)