Skip to content

Fix 1866 #2033

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion qiita_db/handlers/tests/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ def test_get(self):
'sequencing_meth': 'Sequencing by synthesis',
'study_center': 'CCME',
'target_gene': '16S rRNA',
'target_subfragment': 'V4'}
'target_subfragment': 'V4',
'qiita_prep_id': '1'}
self.assertEqual(obs, exp)


Expand Down
4 changes: 4 additions & 0 deletions qiita_db/metadata_template/base_metadata_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -1062,6 +1062,10 @@ def to_dataframe(self):
# Make sure that we are changing np.NaN by Nones
df.where((pd.notnull(df)), None)
df.set_index('sample_id', inplace=True, drop=True)
id_column_name = 'qiita_%sid' % (self._table_prefix)
if id_column_name == 'qiita_sample_id':
id_column_name = 'qiita_study_id'
df[id_column_name] = str(self.id)

return df

Expand Down
12 changes: 6 additions & 6 deletions qiita_db/metadata_template/test/test_prep_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ def test_to_dataframe(self):
u'experiment_design_description', u'experiment_title', u'platform',
u'instrument_model', u'samp_size', u'sequencing_meth',
u'illumina_technology', u'sample_center', u'pcr_primers',
u'study_center'})
u'study_center', 'qiita_prep_id'})

def test_clean_validate_template_error_bad_chars(self):
"""Raises an error if there are invalid characters in the sample names
Expand Down Expand Up @@ -1081,7 +1081,7 @@ def test_to_file(self):
self._clean_up_files.append(fp)
with open(fp, 'U') as f:
obs = f.read()
self.assertEqual(obs, EXP_PREP_TEMPLATE)
self.assertEqual(obs, EXP_PREP_TEMPLATE.format(pt.id))

def test_investigation_type_setter(self):
"""Able to update the investigation type"""
Expand Down Expand Up @@ -1494,15 +1494,15 @@ def test_delete_sample(self):
'sample_name\tbarcode\tcenter_name\tcenter_project_name\t'
'ebi_submission_accession\temp_status\texperiment_design_description\t'
'instrument_model\tlibrary_construction_protocol\tplatform\tprimer\t'
'run_prefix\tstr_column\n'
'qiita_prep_id\trun_prefix\tstr_column\n'
'1.SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\t\tEMP\tBBBB\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
's_G1_L002_sequences\tValue for sample 3\n'
'1.SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\t\tEMP\tBBBB\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
's_G1_L001_sequences\tValue for sample 1\n'
'1.SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\t\tEMP\tBBBB\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t'
'Illumina MiSeq\tAAAA\tILLUMINA\tGTGCCAGCMGCCGCGGTAA\t{0}\t'
's_G1_L001_sequences\tValue for sample 2\n')


Expand Down
28 changes: 17 additions & 11 deletions qiita_db/metadata_template/test/test_sample_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -1806,8 +1806,9 @@ def test_to_dataframe(self):
self.metadata, self.new_study)
obs = st.to_dataframe()

new_id = self.new_study.id
exp_dict = {
'%s.Sample1' % self.new_study.id: {
'%s.Sample1' % new_id: {
'physical_specimen_location': 'location1',
'physical_specimen_remaining': 'true',
'dna_extracted': 'true',
Expand All @@ -1819,8 +1820,9 @@ def test_to_dataframe(self):
'latitude': '42.42',
'longitude': '41.41',
'taxon_id': '9606',
'qiita_study_id': str(new_id),
'scientific_name': 'homo sapiens'},
'%s.Sample2' % self.new_study.id: {
'%s.Sample2' % new_id: {
'physical_specimen_location': 'location1',
'physical_specimen_remaining': 'true',
'dna_extracted': 'true',
Expand All @@ -1832,8 +1834,9 @@ def test_to_dataframe(self):
'latitude': '4.2',
'longitude': '1.1',
'taxon_id': '9606',
'qiita_study_id': str(new_id),
'scientific_name': 'homo sapiens'},
'%s.Sample3' % self.new_study.id: {
'%s.Sample3' % new_id: {
'physical_specimen_location': 'location1',
'physical_specimen_remaining': 'true',
'dna_extracted': 'true',
Expand All @@ -1845,6 +1848,7 @@ def test_to_dataframe(self):
'latitude': '4.8',
'longitude': '4.41',
'taxon_id': '9606',
'qiita_study_id': str(new_id),
'scientific_name': 'homo sapiens'},
}
exp = pd.DataFrame.from_dict(exp_dict, orient='index', dtype=str)
Expand Down Expand Up @@ -1881,7 +1885,7 @@ def test_to_dataframe(self):
'water_content_soil', 'elevation', 'temp', 'tot_nitro',
'samp_salinity', 'altitude', 'env_biome', 'country', 'ph',
'anonymized_name', 'tot_org_carb', 'description_duplicate',
'env_feature', 'scientific_name'})
'env_feature', 'scientific_name', 'qiita_study_id'})

def test_check_restrictions(self):
obs = self.tester.check_restrictions(
Expand Down Expand Up @@ -2166,22 +2170,24 @@ def test_delete_sample(self):
EXP_SAMPLE_TEMPLATE = (
"sample_name\tcollection_timestamp\tdescription\tdna_extracted\t"
"host_subject_id\tlatitude\tlongitude\tphysical_specimen_location\t"
"physical_specimen_remaining\tsample_type\tscientific_name\ttaxon_id\n"
"physical_specimen_remaining\tqiita_study_id\tsample_type\t"
"scientific_name\ttaxon_id\n"
"{0}.Sample1\t05/29/2014 12:24:15\tTest Sample 1\ttrue\tNotIdentified\t"
"42.42\t41.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
"42.42\t41.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
"{0}.Sample2\t05/29/2014 12:24:15\tTest Sample 2\ttrue\tNotIdentified\t"
"4.2\t1.1\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
"4.2\t1.1\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
"{0}.Sample3\t05/29/2014 12:24:15\tTest Sample 3\ttrue\tNotIdentified\t"
"4.8\t4.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n")
"4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n")

EXP_SAMPLE_TEMPLATE_FEWER_SAMPLES = (
"sample_name\tcollection_timestamp\tdescription\tdna_extracted\t"
"host_subject_id\tlatitude\tlongitude\tphysical_specimen_location\t"
"physical_specimen_remaining\tsample_type\tscientific_name\ttaxon_id\n"
"physical_specimen_remaining\tqiita_study_id\tsample_type\t"
"scientific_name\ttaxon_id\n"
"{0}.Sample1\t05/29/2014 12:24:15\tTest Sample 1\ttrue\tNotIdentified\t"
"42.42\t41.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n"
"42.42\t41.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n"
"{0}.Sample3\t05/29/2014 12:24:15\tTest Sample 3\ttrue\tNotIdentified\t"
"4.8\t4.41\tlocation1\ttrue\ttype1\thomo sapiens\t9606\n")
"4.8\t4.41\tlocation1\ttrue\t{0}\ttype1\thomo sapiens\t9606\n")


if __name__ == '__main__':
Expand Down
5 changes: 4 additions & 1 deletion qiita_db/metadata_template/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,10 @@ def validate_invalid_column_names(column_names):
# tests.
forbidden_values = {
# https://github.com/biocore/qiita/issues/2026
'sampleid'
'sampleid',
# https://github.com/biocore/qiita/issues/1866
'qiita_study_id',
'qiita_prep_id'
}
forbidden = forbidden_values & column_names

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#SampleID BarcodeSequence LinkerPrimerSequence center_name center_project_name emp_status experiment_center experiment_design_description experiment_title illumina_technology instrument_model library_construction_protocol pcr_primers platform run_center run_date run_prefix samp_size sample_center sequencing_meth study_center target_gene target_subfragment altitude anonymized_name assigned_from_geo collection_timestamp common_name country depth description_duplicate elevation env_biome env_feature has_extracted_data has_physical_specimen host_subject_id host_taxid latitude longitude ph physical_location samp_salinity sample_type season_environment taxon_id temp texture tot_nitro tot_org_carb water_content_soil qiita_study_title qiita_study_alias qiita_owner qiita_principal_investigator Description
1.SKB8.640193 AGCGCTCACATC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKB8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M7 3483 74.0894932572 65.3283470202 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
1.SKD8.640184 TGAGTGGTCTGT GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKD8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Diesel Root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:D9 3483 57.571893782 32.5563076447 6.8 ANL 7.1 ENVO:soil winter 1118232 15.0 66 sand, 16.3 silt, 17.7 clay 1.51 4.32 0.178 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
1.SKB7.640196 CGGCCTAAGTTC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 0.0 SKB7 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M8 3483 13.089194595 92.5274472082 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
#SampleID BarcodeSequence LinkerPrimerSequence center_name center_project_name emp_status experiment_center experiment_design_description experiment_title illumina_technology instrument_model library_construction_protocol pcr_primers platform run_center run_date run_prefix samp_size sample_center sequencing_meth study_center target_gene target_subfragment qiita_prep_id altitude anonymized_name assigned_from_geo collection_timestamp common_name country depth description_duplicate elevation env_biome env_feature has_extracted_data has_physical_specimen host_subject_id host_taxid latitude longitude ph physical_location samp_salinity sample_type season_environment taxon_id temp texture tot_nitro tot_org_carb water_content_soil qiita_study_title qiita_study_alias qiita_owner qiita_principal_investigator Description
1.SKB8.640193 AGCGCTCACATC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0.0 SKB8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M7 3483 74.0894932572 65.3283470202 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
1.SKD8.640184 TGAGTGGTCTGT GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0.0 SKD8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Diesel Root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:D9 3483 57.571893782 32.5563076447 6.8 ANL 7.1 ENVO:soil winter 1118232 15.0 66 sand, 16.3 silt, 17.7 clay 1.51 4.32 0.178 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
1.SKB7.640196 CGGCCTAAGTTC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0.0 SKB7 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M8 3483 13.089194595 92.5274472082 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
Loading