Skip to content

Commit aa68a21

Browse files
authored
Merge pull request #2075 from antgonza/fix-timestamp
Fix timestamp / -> -
2 parents 4bf4808 + f4835d5 commit aa68a21

File tree

19 files changed

+460
-284
lines changed

19 files changed

+460
-284
lines changed

qiita_db/handlers/tests/test_prep_template.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,18 @@ def test_get(self):
4646
path_builder = partial(join, db_test_template_dir)
4747

4848
obs = loads(obs.body)
49-
exp = {'data_type': '18S',
50-
'artifact': 1,
51-
'investigation_type': 'Metagenomics',
52-
'study': 1,
53-
'status': 'private',
54-
'qiime-map': path_builder('1_prep_1_qiime_19700101-000000.txt'),
55-
'prep-file': path_builder('1_prep_1_19700101-000000.txt')}
56-
self.assertEqual(obs, exp)
49+
50+
# have to check per key because since patch 51 we are updating the
51+
# test info files
52+
self.assertEqual(obs['data_type'], '18S')
53+
self.assertEqual(obs['artifact'], 1)
54+
self.assertEqual(obs['investigation_type'], 'Metagenomics')
55+
self.assertEqual(obs['study'], 1)
56+
self.assertEqual(obs['status'], 'private')
57+
self.assertTrue(obs['qiime-map'].startswith(
58+
path_builder('1_prep_1_qiime_')))
59+
self.assertTrue(obs['prep-file'].startswith(
60+
path_builder('1_prep_1_')))
5761

5862

5963
class PrepTemplateDataHandlerTests(OauthTestingBase):

qiita_db/metadata_template/base_metadata_template.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,7 +1430,11 @@ def validate(self, restriction_dict):
14301430
else:
14311431
valid_null = qdb.metadata_template.constants.EBI_NULL_VALUES
14321432
for column, datatype in viewitems(restriction.columns):
1433-
for sample, val in viewitems(self.get_category(column)):
1433+
# sorting by key (sample id) so we always check in the
1434+
# same order, helpful for testing
1435+
cats_by_column = self.get_category(column)
1436+
for sample in sorted(cats_by_column):
1437+
val = cats_by_column[sample]
14341438
# ignore if valid null value
14351439
if val in valid_null:
14361440
continue
@@ -1439,11 +1443,8 @@ def validate(self, restriction_dict):
14391443
val = str(val)
14401444
formats = [
14411445
# 4 digits year
1442-
'%m/%d/%Y %H:%M:%S', '%m/%d/%Y %H:%M',
1443-
'%m/%d/%Y %H', '%m/%d/%Y', '%m/%Y', '%Y',
1444-
# 2 digits year
1445-
'%m/%d/%y %H:%M:%S', '%m/%d/%y %H:%M',
1446-
'%m/%d/%y %H', '%m/%d/%y', '%m/%y', '%y'
1446+
'%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M',
1447+
'%Y-%m-%d %H', '%Y-%m-%d', '%Y-%m', '%Y'
14471448
]
14481449
date = None
14491450
for fmt in formats:

qiita_db/metadata_template/test/test_prep_template.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,8 @@ def test_create_qiime_mapping_file(self):
917917
exp = pd.read_csv(
918918
exp_fp, sep='\t', infer_datetime_format=False,
919919
parse_dates=False, index_col=False, comment='\t')
920+
obs = obs.reindex_axis(sorted(obs.columns), axis=1)
921+
exp = exp.reindex_axis(sorted(exp.columns), axis=1)
920922

921923
assert_frame_equal(obs, exp)
922924

qiita_db/metadata_template/test/test_sample_template.py

Lines changed: 102 additions & 78 deletions
Large diffs are not rendered by default.

qiita_db/metadata_template/test/test_util.py

Lines changed: 67 additions & 67 deletions
Large diffs are not rendered by default.

qiita_db/support_files/patches/51.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- Feb 9, 2017
2+
-- changing format of stored timestamps
3+
-- see python patch
4+
5+
SELECT 1;
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
from future.utils import viewitems
2+
from datetime import datetime
3+
4+
from qiita_db.metadata_template.constants import (
5+
SAMPLE_TEMPLATE_COLUMNS, PREP_TEMPLATE_COLUMNS,
6+
PREP_TEMPLATE_COLUMNS_TARGET_GENE)
7+
from qiita_db.metadata_template.prep_template import PrepTemplate
8+
from qiita_db.metadata_template.sample_template import SampleTemplate
9+
from qiita_db.sql_connection import TRN
10+
11+
12+
# getting columns in each info file that we need to check for
13+
cols_sample = [col
14+
for key, vals in viewitems(SAMPLE_TEMPLATE_COLUMNS)
15+
for col, dt in viewitems(vals.columns) if dt == datetime]
16+
cols_prep = [col
17+
for key, vals in viewitems(PREP_TEMPLATE_COLUMNS)
18+
for col, dt in viewitems(vals.columns) if dt == datetime].extend(
19+
[col
20+
for key, vals in viewitems(PREP_TEMPLATE_COLUMNS_TARGET_GENE)
21+
for col, dt in viewitems(vals.columns)])
22+
23+
24+
def transform_date(value):
25+
# for the way the patches are applied we need to have this import and
26+
# the next 2 variables within this function
27+
from datetime import datetime
28+
29+
# old format : new format
30+
formats = {
31+
# 4 digits year
32+
'%m/%d/%Y %H:%M:%S': '%Y-%m-%d %H:%M:%S',
33+
'%m-%d-%Y %H:%M': '%Y-%m-%d %H:%M',
34+
'%m/%d/%Y %H': '%Y-%m-%d %H',
35+
'%m-%d-%Y': '%Y-%m-%d',
36+
'%m-%Y': '%Y-%m',
37+
'%Y': '%Y',
38+
# 2 digits year
39+
'%m/%d/%y %H:%M:%S': '%Y-%m-%d %H:%M:%S',
40+
'%m-%d-%y %H:%M': '%Y-%m-%d %H:%M',
41+
'%m/%d/%y %H': '%Y-%m-%d %H',
42+
'%m-%d-%y': '%Y-%m-%d',
43+
'%m-%y': '%Y-%m',
44+
'%y': '%Y'
45+
}
46+
47+
# loop over the old formats to see which one is it
48+
date = None
49+
for i, fmt in enumerate(formats):
50+
try:
51+
date = datetime.strptime(value, fmt)
52+
break
53+
except ValueError:
54+
pass
55+
if date is not None:
56+
value = date.strftime(formats[fmt])
57+
return value
58+
59+
if cols_sample:
60+
with TRN:
61+
# a few notes: just getting the preps with duplicated values; ignoring
62+
# column 'sample_id' and tables 'study_sample', 'prep_template',
63+
# 'prep_template_sample'
64+
sql = """SELECT table_name, array_agg(column_name::text)
65+
FROM information_schema.columns
66+
WHERE column_name IN %s
67+
AND table_name LIKE 'sample_%%'
68+
AND table_name NOT IN (
69+
'prep_template', 'prep_template_sample')
70+
GROUP BY table_name"""
71+
# note that we are looking for those columns with duplicated names in
72+
# the headers
73+
TRN.add(sql, [tuple(set(cols_sample))])
74+
for table, columns in viewitems(dict(TRN.execute_fetchindex())):
75+
# [1] the format is table_# so taking the #
76+
st = SampleTemplate(int(table.split('_')[1]))
77+
# getting just the columns of interest
78+
st_df = st.to_dataframe()[columns]
79+
# converting to datetime
80+
for col in columns:
81+
st_df[col] = st_df[col].apply(transform_date)
82+
st.update(st_df)
83+
84+
if cols_prep:
85+
with TRN:
86+
# a few notes: just getting the preps with duplicated values; ignoring
87+
# column 'sample_id' and tables 'study_sample', 'prep_template',
88+
# 'prep_template_sample'
89+
sql = """SELECT table_name, array_agg(column_name::text)
90+
FROM information_schema.columns
91+
WHERE column_name IN %s
92+
AND table_name LIKE 'prep_%%'
93+
AND table_name NOT IN (
94+
'prep_template', 'prep_template_sample')
95+
GROUP BY table_name"""
96+
# note that we are looking for those columns with duplicated names in
97+
# the headers
98+
TRN.add(sql, [tuple(set(cols_prep))])
99+
for table, columns in viewitems(dict(TRN.execute_fetchindex())):
100+
# [1] the format is table_# so taking the #
101+
pt = PrepTemplate(int(table.split('_')[1]))
102+
# getting just the columns of interest
103+
pt_df = pt.to_dataframe()[columns]
104+
# converting to datetime
105+
for col in columns:
106+
pt_df[col] = pt_df[col].apply(transform_date)
107+
pt.update(pt_df)
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#SampleID BarcodeSequence LinkerPrimerSequence center_name center_project_name emp_status experiment_center experiment_design_description experiment_title illumina_technology instrument_model library_construction_protocol pcr_primers platform run_center run_date run_prefix samp_size sample_center sequencing_meth study_center target_gene target_subfragment qiita_prep_id altitude anonymized_name assigned_from_geo collection_timestamp common_name country depth description_duplicate elevation env_biome env_feature has_extracted_data has_physical_specimen host_subject_id host_taxid latitude longitude ph physical_location samp_salinity sample_type season_environment taxon_id temp texture tot_nitro tot_org_carb water_content_soil qiita_study_title qiita_study_alias qiita_owner qiita_principal_investigator Description
2-
1.SKB8.640193 AGCGCTCACATC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0.0 SKB8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M7 3483 74.0894932572 65.3283470202 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
3-
1.SKD8.640184 TGAGTGGTCTGT GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0.0 SKD8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Diesel Root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:D9 3483 57.571893782 32.5563076447 6.8 ANL 7.1 ENVO:soil winter 1118232 15.0 66 sand, 16.3 silt, 17.7 clay 1.51 4.32 0.178 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
4-
1.SKB7.640196 CGGCCTAAGTTC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0.0 SKB7 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root 114.0 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat True True 1001:M8 3483 13.089194595 92.5274472082 6.94 ANL 7.15 ENVO:soil winter 1118232 15.0 64.6 sand, 17.6 silt, 17.8 clay 1.41 5.0 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
1+
#SampleID BarcodeSequence LinkerPrimerSequence center_name emp_status experiment_center experiment_design_description experiment_title illumina_technology instrument_model library_construction_protocol pcr_primers platform run_center run_date run_prefix samp_size sample_center sequencing_meth study_center target_gene target_subfragment qiita_prep_id altitude anonymized_name assigned_from_geo collection_timestamp common_name country depth description_duplicate dna_extracted elevation env_biome env_feature host_subject_id host_taxid latitude longitude ph physical_specimen_location physical_specimen_remaining qiita_study_id samp_salinity sample_type scientific_name season_environment taxon_id temp texture tot_nitro tot_org_carb water_content_soil qiita_study_title qiita_study_alias qiita_owner qiita_principal_investigator Description
2+
1.SKB8.640193 AGCGCTCACATC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0 SKB8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root true 114 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat 1001:M7 3483 74.0894932572 65.3283470202 6.94 ANL true 1 7.15 ENVO:soil 1118232 winter 1118232 15 64.6 sand, 17.6 silt, 17.8 clay 1.41 5 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
3+
1.SKD8.640184 TGAGTGGTCTGT GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0 SKD8 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Diesel Root true 114 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat 1001:D9 3483 57.571893782 32.5563076447 6.8 ANL true 1 7.1 ENVO:soil 1118232 winter 1118232 15 66 sand, 16.3 silt, 17.7 clay 1.51 4.32 0.178 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome
4+
1.SKB7.640196 CGGCCTAAGTTC GTGCCAGCMGCCGCGGTAA ANL EMP ANL micro biome of soil and rhizosphere of cannabis plants from CA Cannabis Soil Microbiome MiSeq Illumina MiSeq This analysis was done as in Caporaso et al 2011 Genome research. The PCR primers (F515/R806) were developed against the V4 region of the 16S rRNA (both bacteria and archaea), which we determined would yield optimal community clustering with reads of this length using a procedure similar to that of ref. 15. [For reference, this primer pair amplifies the region 533_786 in the Escherichia coli strain 83972 sequence (greengenes accession no. prokMSA_id:470367).] The reverse PCR primer is barcoded with a 12-base error-correcting Golay code to facilitate multiplexing of up to 1,500 samples per lane, and both PCR primers contain sequencer adapter regions. FWD:GTGCCAGCMGCCGCGGTAA; REV:GGACTACHVGGGTWTCTAAT Illumina ANL 8/1/12 s_G1_L001_sequences .25,g ANL Sequencing by synthesis CCME 16S rRNA V4 1 0 SKB7 n 2011-11-11 13:00:00 root metagenome GAZ:United States of America 0.15 Burmese root true 114 ENVO:Temperate grasslands, savannas, and shrubland biome ENVO:plant-associated habitat 1001:M8 3483 13.089194595 92.5274472082 6.94 ANL true 1 7.15 ENVO:soil 1118232 winter 1118232 15 64.6 sand, 17.6 silt, 17.8 clay 1.41 5 0.164 Identification of the Microbiomes for Cannabis Soils Cannabis Soils Dude PIDude Cannabis Soil Microbiome

0 commit comments

Comments
 (0)