Skip to content

Commit e177200

Browse files
authored
fix #3076 (#3077)
* fix #3076 * addressing @ElDeveloper comments
1 parent 6de1e1d commit e177200

File tree

4 files changed

+1676
-1689
lines changed

4 files changed

+1676
-1689
lines changed

qiita_db/metadata_template/test/test_util.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,15 @@ def test_load_template_to_dataframe_with_nulls(self):
265265
exp.index.name = 'sample_name'
266266
assert_frame_equal(obs, exp, check_like=True)
267267

268+
def test_load_template_to_dataframe_better_tokenizing_error_msg(self):
269+
with self.assertRaisesRegex(RuntimeError, 'Your file has more columns '
270+
'with values than headers'):
271+
qdb.metadata_template.util.load_template_to_dataframe(
272+
StringIO('sample_name\tcollection_timestamp\n'
273+
'2.Sample1\t2014-05-29 12:24:51\t\n'
274+
'2.Sample2\taaa\n'
275+
'xxx\tadfa\t\t\n'))
276+
268277
def test_get_invalid_sample_names(self):
269278
all_valid = ['2.sample.1', 'foo.bar.baz', 'roses', 'are', 'red',
270279
'v10l3t5', '4r3', '81u3']

qiita_db/metadata_template/util.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ def load_template_to_dataframe(fn, index='sample_name'):
8888
the database
8989
9090
Everything in the DataFrame will be read and managed as string
91+
92+
While reading the file via pandas, it's possible that it will raise a
93+
'tokenizing' pd.errors.ParserError which is confusing for users; thus,
94+
rewriting the error with an explanation of what it means and how to fix.
9195
"""
9296
# Load in file lines
9397
holdfile = None
@@ -144,16 +148,26 @@ def load_template_to_dataframe(fn, index='sample_name'):
144148
# comment:
145149
# using the tab character as "comment" we remove rows that are
146150
# constituted only by delimiters i. e. empty rows.
147-
template = pd.read_csv(
148-
StringIO(''.join(holdfile)),
149-
sep='\t',
150-
dtype=str,
151-
encoding='utf-8',
152-
infer_datetime_format=False,
153-
keep_default_na=False,
154-
index_col=False,
155-
comment='\t',
156-
converters={index: lambda x: str(x).strip()})
151+
try:
152+
template = pd.read_csv(
153+
StringIO(''.join(holdfile)),
154+
sep='\t',
155+
dtype=str,
156+
encoding='utf-8',
157+
infer_datetime_format=False,
158+
keep_default_na=False,
159+
index_col=False,
160+
comment='\t',
161+
converters={index: lambda x: str(x).strip()})
162+
except pd.errors.ParserError as e:
163+
if 'tokenizing' in str(e):
164+
msg = ('Your file has more columns with values than headers. To '
165+
'fix, make sure to delete any extra rows or columns; they '
166+
'might look empty because they have spaces. Then upload '
167+
'and try again.')
168+
raise RuntimeError(msg)
169+
else:
170+
raise e
157171
# remove newlines and tabs from fields
158172
template.replace(to_replace='[\t\n\r\x0b\x0c]+', value='',
159173
regex=True, inplace=True)

qiita_db/support_files/qiita-db.dbs

Lines changed: 87 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -939,10 +939,10 @@
939939
<column name="deprecated" type="boolean" jt="-7" >
940940
<defo><![CDATA[FALSE]]></defo>
941941
</column>
942-
<column name="creation_timestamp" type="timestamp" jt="-9" >
942+
<column name="creation_timestamp" type="timestamp" jt="93" >
943943
<defo><![CDATA[FALSE]]></defo>
944944
</column>
945-
<column name="modification_timestamp" type="timestamp" jt="-11" >
945+
<column name="modification_timestamp" type="timestamp" jt="93" >
946946
<defo><![CDATA[FALSE]]></defo>
947947
</column>
948948
<index name="pk_prep_template" unique="PRIMARY_KEY" >
@@ -1686,90 +1686,90 @@ Controlled Vocabulary]]></comment>
16861686
</schema>
16871687
<connector name="PostgreSQL" database="PostgreSQL" host="localhost" port="5432" instance="qiita_test" user="ccowart" />
16881688
<layout name="qiita" id="Layout669806" confirmed="y" show_relation="columns" >
1689-
<comment><![CDATA[qiita]]></comment>
1690-
<entity schema="qiita" name="analysis" color="D0DEF5" x="224" y="880" />
1691-
<entity schema="qiita" name="analysis_filepath" color="C0D4F3" x="400" y="880" />
1692-
<entity schema="qiita" name="analysis_portal" color="A8C4EF" x="176" y="768" />
1693-
<entity schema="qiita" name="analysis_sample" color="D0DEF5" x="256" y="1072" />
1694-
<entity schema="qiita" name="analysis_users" color="D0DEF5" x="48" y="880" />
1695-
<entity schema="qiita" name="archive_feature_value" color="B2CDF7" x="1472" y="1552" />
1696-
<entity schema="qiita" name="archive_merging_scheme" color="B2CDF7" x="1248" y="1552" />
1697-
<entity schema="qiita" name="artifact" color="B2CDF7" x="1200" y="960" />
1698-
<entity schema="qiita" name="artifact_filepath" color="B2CDF7" x="1040" y="1040" />
1699-
<entity schema="qiita" name="artifact_output_processing_job" color="B2CDF7" x="1760" y="1456" />
1700-
<entity schema="qiita" name="artifact_processing_job" color="B2CDF7" x="1744" y="1344" />
1701-
<entity schema="qiita" name="artifact_type" color="D0DEF5" x="1408" y="960" />
1702-
<entity schema="qiita" name="artifact_type_filepath_type" color="B2CDF7" x="1376" y="816" />
1703-
<entity schema="qiita" name="checksum_algorithm" color="B2CDF7" x="736" y="1040" />
1704-
<entity schema="qiita" name="column_controlled_vocabularies" color="D0DEF5" x="272" y="1248" />
1705-
<entity schema="qiita" name="column_ontology" color="D0DEF5" x="272" y="1504" />
1706-
<entity schema="qiita" name="command_output" color="B2CDF7" x="1760" y="1216" />
1707-
<entity schema="qiita" name="command_parameter" color="B2CDF7" x="2176" y="1040" />
1708-
<entity schema="qiita" name="controlled_vocab" color="D0DEF5" x="48" y="1248" />
1709-
<entity schema="qiita" name="controlled_vocab_values" color="D0DEF5" x="48" y="1376" />
1710-
<entity schema="qiita" name="data_directory" color="B2CDF7" x="576" y="1008" />
1711-
<entity schema="qiita" name="data_type" color="D0DEF5" x="704" y="1200" />
1712-
<entity schema="qiita" name="default_parameter_set" color="B2CDF7" x="2416" y="1168" />
1713-
<entity schema="qiita" name="default_workflow" color="B2CDF7" x="2656" y="1120" />
1714-
<entity schema="qiita" name="default_workflow_edge" color="B2CDF7" x="2640" y="1312" />
1715-
<entity schema="qiita" name="default_workflow_edge_connections" color="B2CDF7" x="2384" y="1424" />
1716-
<entity schema="qiita" name="default_workflow_node" color="B2CDF7" x="2416" y="1280" />
1717-
<entity schema="qiita" name="ebi_run_accession" color="B2CDF7" x="1344" y="1184" />
1718-
<entity schema="qiita" name="environmental_package" color="B2CDF7" x="2176" y="224" />
1719-
<entity schema="qiita" name="filepath" color="C0D4F3" x="640" y="832" />
1720-
<entity schema="qiita" name="filepath_type" color="C0D4F3" x="816" y="784" />
1721-
<entity schema="qiita" name="investigation" color="C0D4F3" x="2256" y="352" />
1722-
<entity schema="qiita" name="investigation_study" color="C0D4F3" x="2032" y="400" />
1723-
<entity schema="qiita" name="logging" color="C0D4F3" x="976" y="1376" />
1724-
<entity schema="qiita" name="message" color="A8C4EF" x="1504" y="1360" />
1725-
<entity schema="qiita" name="message_user" color="A8C4EF" x="1312" y="1344" />
1726-
<entity schema="qiita" name="mixs_field_description" color="D0DEF5" x="256" y="1360" />
1727-
<entity schema="qiita" name="oauth_identifiers" color="B7C8E3" x="2352" y="768" />
1728-
<entity schema="qiita" name="oauth_software" color="B2CDF7" x="2192" y="768" />
1729-
<entity schema="qiita" name="ontology" color="D0DEF5" x="576" y="1488" />
1730-
<entity schema="qiita" name="parameter_artifact_type" color="B2CDF7" x="1760" y="1120" />
1731-
<entity schema="qiita" name="parent_artifact" color="B2CDF7" x="1040" y="928" />
1732-
<entity schema="qiita" name="parent_processing_job" color="B2CDF7" x="2016" y="1472" />
1733-
<entity schema="qiita" name="per_study_tags" color="B2CDF7" x="2160" y="496" />
1734-
<entity schema="qiita" name="portal_type" color="C0D4F3" x="816" y="640" />
1735-
<entity schema="qiita" name="prep_template" color="B2CDF7" x="1232" y="512" />
1736-
<entity schema="qiita" name="prep_template_filepath" color="B2CDF7" x="1024" y="512" />
1737-
<entity schema="qiita" name="prep_template_processing_job" color="B2CDF7" x="1728" y="1008" />
1738-
<entity schema="qiita" name="prep_template_sample" color="D0DEF5" x="1136" y="304" />
1739-
<entity schema="qiita" name="prep_y" color="D0DEF5" x="992" y="320" />
1740-
<entity schema="qiita" name="processing_job" color="B2CDF7" x="1952" y="1168" />
1741-
<entity schema="qiita" name="processing_job_resource_allocation" color="C1D8EE" x="2624" y="1456" />
1742-
<entity schema="qiita" name="processing_job_status" color="B2CDF7" x="1760" y="1552" />
1743-
<entity schema="qiita" name="processing_job_validator" color="B2CDF7" x="2176" y="1472" />
1744-
<entity schema="qiita" name="processing_job_workflow" color="B2CDF7" x="2176" y="1584" />
1745-
<entity schema="qiita" name="processing_job_workflow_root" color="B2CDF7" x="2416" y="1584" />
1746-
<entity schema="qiita" name="publication" color="B2CDF7" x="2016" y="768" />
1747-
<entity schema="qiita" name="qiita_user" color="D0DEF5" x="592" y="384" />
1748-
<entity schema="qiita" name="reference" color="C0D4F3" x="2016" y="1584" />
1749-
<entity schema="qiita" name="restrictions" color="B2CDF7" x="1536" y="400" />
1750-
<entity schema="qiita" name="sample_template_filepath" color="B2CDF7" x="1040" y="656" />
1751-
<entity schema="qiita" name="sample_x" color="D0DEF5" x="1536" y="288" />
1752-
<entity schema="qiita" name="severity" color="C0D4F3" x="1120" y="1376" />
1753-
<entity schema="qiita" name="software" color="B2CDF7" x="2416" y="992" />
1754-
<entity schema="qiita" name="software_artifact_type" color="B2CDF7" x="1984" y="1008" />
1755-
<entity schema="qiita" name="software_command" color="B2CDF7" x="2176" y="1264" />
1756-
<entity schema="qiita" name="software_publication" color="B2CDF7" x="1984" y="864" />
1757-
<entity schema="qiita" name="software_type" color="B2CDF7" x="2640" y="992" />
1758-
<entity schema="qiita" name="stats_daily" color="B2CDF7" x="2496" y="368" />
1759-
<entity schema="qiita" name="study" color="D0DEF5" x="1792" y="288" />
1760-
<entity schema="qiita" name="study_artifact" color="B2CDF7" x="1600" y="800" />
1761-
<entity schema="qiita" name="study_environmental_package" color="B2CDF7" x="2096" y="80" />
1762-
<entity schema="qiita" name="study_person" color="C0D4F3" x="2016" y="176" />
1763-
<entity schema="qiita" name="study_portal" color="A8C4EF" x="1856" y="80" />
1764-
<entity schema="qiita" name="study_prep_template" color="D0DEF5" x="1504" y="528" />
1765-
<entity schema="qiita" name="study_publication" color="B2CDF7" x="1856" y="768" />
1766-
<entity schema="qiita" name="study_sample" color="D0DEF5" x="1360" y="288" />
1767-
<entity schema="qiita" name="study_tags" color="B2CDF7" x="2304" y="528" />
1768-
<entity schema="qiita" name="study_users" color="D0DEF5" x="1776" y="176" />
1769-
<entity schema="qiita" name="term" color="D0DEF5" x="736" y="1488" />
1770-
<entity schema="qiita" name="timeseries_type" color="C0D4F3" x="2016" y="576" />
1771-
<entity schema="qiita" name="user_level" color="D0DEF5" x="432" y="352" />
1772-
<entity schema="qiita" name="visibility" color="B2CDF7" x="1040" y="1152" />
1689+
<comment><![CDATA[qiita-db]]></comment>
1690+
<entity schema="qiita" name="analysis" color="D0DEF5" x="192" y="896" />
1691+
<entity schema="qiita" name="analysis_filepath" color="C0D4F3" x="384" y="848" />
1692+
<entity schema="qiita" name="analysis_portal" color="A8C4EF" x="224" y="736" />
1693+
<entity schema="qiita" name="analysis_sample" color="D0DEF5" x="368" y="976" />
1694+
<entity schema="qiita" name="analysis_users" color="D0DEF5" x="80" y="800" />
1695+
<entity schema="qiita" name="archive_feature_value" color="B2CDF7" x="1472" y="1472" />
1696+
<entity schema="qiita" name="archive_merging_scheme" color="B2CDF7" x="1248" y="1472" />
1697+
<entity schema="qiita" name="artifact" color="B2CDF7" x="1200" y="880" />
1698+
<entity schema="qiita" name="artifact_filepath" color="B2CDF7" x="1040" y="960" />
1699+
<entity schema="qiita" name="artifact_output_processing_job" color="B2CDF7" x="1760" y="1376" />
1700+
<entity schema="qiita" name="artifact_processing_job" color="B2CDF7" x="1728" y="1280" />
1701+
<entity schema="qiita" name="artifact_type" color="D0DEF5" x="1408" y="880" />
1702+
<entity schema="qiita" name="artifact_type_filepath_type" color="B2CDF7" x="1376" y="736" />
1703+
<entity schema="qiita" name="checksum_algorithm" color="B2CDF7" x="736" y="960" />
1704+
<entity schema="qiita" name="column_controlled_vocabularies" color="D0DEF5" x="272" y="1168" />
1705+
<entity schema="qiita" name="column_ontology" color="D0DEF5" x="272" y="1424" />
1706+
<entity schema="qiita" name="command_output" color="B2CDF7" x="1728" y="1136" />
1707+
<entity schema="qiita" name="command_parameter" color="B2CDF7" x="2192" y="960" />
1708+
<entity schema="qiita" name="controlled_vocab" color="D0DEF5" x="48" y="1168" />
1709+
<entity schema="qiita" name="controlled_vocab_values" color="D0DEF5" x="48" y="1296" />
1710+
<entity schema="qiita" name="data_directory" color="B2CDF7" x="576" y="928" />
1711+
<entity schema="qiita" name="data_type" color="D0DEF5" x="704" y="1120" />
1712+
<entity schema="qiita" name="default_parameter_set" color="B2CDF7" x="2416" y="1104" />
1713+
<entity schema="qiita" name="default_workflow" color="B2CDF7" x="2656" y="1040" />
1714+
<entity schema="qiita" name="default_workflow_edge" color="B2CDF7" x="2640" y="1232" />
1715+
<entity schema="qiita" name="default_workflow_edge_connections" color="B2CDF7" x="2384" y="1376" />
1716+
<entity schema="qiita" name="default_workflow_node" color="B2CDF7" x="2416" y="1232" />
1717+
<entity schema="qiita" name="ebi_run_accession" color="B2CDF7" x="1344" y="1104" />
1718+
<entity schema="qiita" name="environmental_package" color="B2CDF7" x="2176" y="176" />
1719+
<entity schema="qiita" name="filepath" color="C0D4F3" x="640" y="752" />
1720+
<entity schema="qiita" name="filepath_type" color="C0D4F3" x="816" y="704" />
1721+
<entity schema="qiita" name="investigation" color="C0D4F3" x="2176" y="272" />
1722+
<entity schema="qiita" name="investigation_study" color="C0D4F3" x="2000" y="384" />
1723+
<entity schema="qiita" name="logging" color="C0D4F3" x="976" y="1296" />
1724+
<entity schema="qiita" name="message" color="A8C4EF" x="1504" y="1280" />
1725+
<entity schema="qiita" name="message_user" color="A8C4EF" x="1312" y="1264" />
1726+
<entity schema="qiita" name="mixs_field_description" color="D0DEF5" x="272" y="1264" />
1727+
<entity schema="qiita" name="oauth_identifiers" color="B7C8E3" x="2352" y="688" />
1728+
<entity schema="qiita" name="oauth_software" color="B2CDF7" x="2192" y="688" />
1729+
<entity schema="qiita" name="ontology" color="D0DEF5" x="576" y="1408" />
1730+
<entity schema="qiita" name="parameter_artifact_type" color="B2CDF7" x="1728" y="1040" />
1731+
<entity schema="qiita" name="parent_artifact" color="B2CDF7" x="1040" y="848" />
1732+
<entity schema="qiita" name="parent_processing_job" color="B2CDF7" x="2016" y="1392" />
1733+
<entity schema="qiita" name="per_study_tags" color="B2CDF7" x="2192" y="528" />
1734+
<entity schema="qiita" name="portal_type" color="C0D4F3" x="768" y="560" />
1735+
<entity schema="qiita" name="prep_template" color="B2CDF7" x="1232" y="464" />
1736+
<entity schema="qiita" name="prep_template_filepath" color="B2CDF7" x="1024" y="416" />
1737+
<entity schema="qiita" name="prep_template_processing_job" color="B2CDF7" x="1728" y="928" />
1738+
<entity schema="qiita" name="prep_template_sample" color="D0DEF5" x="1152" y="304" />
1739+
<entity schema="qiita" name="prep_y" color="D0DEF5" x="992" y="304" />
1740+
<entity schema="qiita" name="processing_job" color="B2CDF7" x="1968" y="1072" />
1741+
<entity schema="qiita" name="processing_job_resource_allocation" color="C1D8EE" x="2656" y="1376" />
1742+
<entity schema="qiita" name="processing_job_status" color="B2CDF7" x="1728" y="1504" />
1743+
<entity schema="qiita" name="processing_job_validator" color="B2CDF7" x="2192" y="1392" />
1744+
<entity schema="qiita" name="processing_job_workflow" color="B2CDF7" x="2192" y="1504" />
1745+
<entity schema="qiita" name="processing_job_workflow_root" color="B2CDF7" x="2448" y="1504" />
1746+
<entity schema="qiita" name="publication" color="B2CDF7" x="1936" y="688" />
1747+
<entity schema="qiita" name="qiita_user" color="D0DEF5" x="704" y="288" />
1748+
<entity schema="qiita" name="reference" color="C0D4F3" x="2016" y="1504" />
1749+
<entity schema="qiita" name="restrictions" color="B2CDF7" x="1584" y="400" />
1750+
<entity schema="qiita" name="sample_template_filepath" color="B2CDF7" x="1008" y="560" />
1751+
<entity schema="qiita" name="sample_x" color="D0DEF5" x="1568" y="288" />
1752+
<entity schema="qiita" name="severity" color="C0D4F3" x="1120" y="1296" />
1753+
<entity schema="qiita" name="software" color="B2CDF7" x="2432" y="896" />
1754+
<entity schema="qiita" name="software_artifact_type" color="B2CDF7" x="1984" y="944" />
1755+
<entity schema="qiita" name="software_command" color="B2CDF7" x="2192" y="1184" />
1756+
<entity schema="qiita" name="software_publication" color="B2CDF7" x="1920" y="784" />
1757+
<entity schema="qiita" name="software_type" color="B2CDF7" x="2640" y="912" />
1758+
<entity schema="qiita" name="stats_daily" color="B2CDF7" x="2464" y="400" />
1759+
<entity schema="qiita" name="study" color="D0DEF5" x="1776" y="208" />
1760+
<entity schema="qiita" name="study_artifact" color="B2CDF7" x="1600" y="720" />
1761+
<entity schema="qiita" name="study_environmental_package" color="B2CDF7" x="2176" y="80" />
1762+
<entity schema="qiita" name="study_person" color="C0D4F3" x="2000" y="224" />
1763+
<entity schema="qiita" name="study_portal" color="A8C4EF" x="1920" y="80" />
1764+
<entity schema="qiita" name="study_prep_template" color="D0DEF5" x="1536" y="560" />
1765+
<entity schema="qiita" name="study_publication" color="B2CDF7" x="1792" y="688" />
1766+
<entity schema="qiita" name="study_sample" color="D0DEF5" x="1376" y="288" />
1767+
<entity schema="qiita" name="study_tags" color="B2CDF7" x="2176" y="416" />
1768+
<entity schema="qiita" name="study_users" color="D0DEF5" x="1776" y="96" />
1769+
<entity schema="qiita" name="term" color="D0DEF5" x="736" y="1408" />
1770+
<entity schema="qiita" name="timeseries_type" color="C0D4F3" x="2000" y="496" />
1771+
<entity schema="qiita" name="user_level" color="D0DEF5" x="560" y="272" />
1772+
<entity schema="qiita" name="visibility" color="B2CDF7" x="1040" y="1072" />
17731773
<group name="Group_analyses" color="C4E0F9" >
17741774
<comment><![CDATA[analysis tables]]></comment>
17751775
<entity schema="qiita" name="analysis" />
@@ -1895,4 +1895,4 @@ ALTER TABLE oauth_software ADD CONSTRAINT fk_oauth_software FOREIGN KEY ( client
18951895
]]></string>
18961896
</script>
18971897
</layout>
1898-
</project>
1898+
</project>

qiita_db/support_files/qiita-db.html

Lines changed: 1556 additions & 1592 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)