Skip to content

Commit

Permalink
dada2 fixes
Browse files Browse the repository at this point in the history
detected here galaxyproject#2701

- data manager: real bug fix
- bug fix in makeSequencTable: cheetah for testing `plot` variable was
wrong
- allow larger delta for tests on Rdata and pdf (mostly 1/2 file size,
but anyway less then the file size .. essentially now a test for file
presence)
  • Loading branch information
bernt-matthias committed Nov 30, 2019
1 parent 78d0d06 commit 0e4b1be
Show file tree
Hide file tree
Showing 14 changed files with 18 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0"?>
<tool id="dada2_fetcher" name="dada2 data manager" tool_type="manage_data" version="0.0.7">
<tool id="dada2_fetcher" name="dada2 data manager" tool_type="manage_data" version="0.0.8">
<description>Download reference databases</description>
<command detect_errors="exit_code"><![CDATA[
python '$__tool_directory__/data_manager.py'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def url_download(url, fname, workdir):
"""
download url to workdir/fname
"""
import logging
logging.error("DL %s"%url)
file_path = os.path.join(workdir, fname)
if not os.path.exists(workdir):
os.makedirs(workdir)
Expand Down Expand Up @@ -120,7 +122,7 @@ def remote_dataset(dataset, outjson):
data_manager_entry['path'] = dataset + ".species"
data_manager_json["data_tables"]["dada2_species"] = data_manager_entry

with file(outjson, 'w') as jf:
with open(outjson, 'w') as jf:
jf.write(json.dumps(data_manager_json))


Expand Down
6 changes: 3 additions & 3 deletions tools/dada2/dada2_dada.xml
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,15 @@ dada_result <- dada(derep, err,
<param name="batch_cond|derep" value="filterAndTrim_F3D0_R1.fq.gz" ftype="fastq.gz" />
<param name="err" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" />
<output_collection name="data_collection" type="list">
<element name="filterAndTrim_F3D0_R1.fq.gz" file="dada_F3D0_R1.Rdata" ftype="dada2_dada"/>
<element name="filterAndTrim_F3D0_R1.fq.gz" file="dada_F3D0_R1.Rdata" ftype="dada2_dada" compare="sim_size" delta="10000"/>
</output_collection>
</test>
<!-- default, batch -->
<test>
<param name="batch_cond|batch_select" value="yes"/>
<param name="batch_cond|derep" value="filterAndTrim_F3D0_R1.fq.gz" ftype="fastq.gz" />
<param name="err" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" />
<output name="dada" value="dada_F3D0_R1.Rdata" ftype="dada2_dada" />
<output name="dada" value="dada_F3D0_R1.Rdata" ftype="dada2_dada" compare="sim_size" delta="10000"/>
</test>
<!-- test non-default options -->
<test>
Expand All @@ -121,7 +121,7 @@ dada_result <- dada(derep, err,
<param name="batch_cond|pool" value="pseudo"/>
<param name="err" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" />
<output_collection name="data_collection" type="list">
<element name="filterAndTrim_F3D0_R1.fq.gz" file="dada_F3D0_R1.Rdata" ftype="dada2_dada"/>
<element name="filterAndTrim_F3D0_R1.fq.gz" file="dada_F3D0_R1.Rdata" ftype="dada2_dada" compare="sim_size" delta="10000"/>
</output_collection>
</test>
</tests>
Expand Down
8 changes: 4 additions & 4 deletions tools/dada2/dada2_learnErrors.xml
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ all reads are counted and contribute to estimating the error rates."/>
<tests>
<test>
<param name="fls" value="filterAndTrim_F3D0_R1.fq.gz" ftype="fastqsanger.gz"/>
<output name="errors" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates"/>
<output name="plot" value="learnErrors_F3D0_R1.pdf" ftype="pdf" compare="sim_size" />
<output name="errors" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" compare="sim_size" delta="7000"/>
<output name="plot" value="learnErrors_F3D0_R1.pdf" ftype="pdf" compare="sim_size" delta="5000" />
</test>
<!-- test w non-default parameters -->
<test>
Expand All @@ -73,8 +73,8 @@ all reads are counted and contribute to estimating the error rates."/>
<param name="plotopt|err_out" value="FALSE" />
<param name="plotopt|err_in" value="TRUE" />
<param name="plotopt|nominalQ" value="FALSE"/>
<output name="errors" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" compare="sim_size" delta="14000"/>
<output name="plot" value="learnErrors_F3D0_R1.pdf" ftype="pdf" compare="sim_size" />
<output name="errors" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" compare="sim_size" delta="13000"/>
<output name="plot" value="learnErrors_F3D0_R1.pdf" ftype="pdf" compare="sim_size" delta="5000" />
</test>
<!-- TODO test w multiple inputs -->
</tests>
Expand Down
8 changes: 4 additions & 4 deletions tools/dada2/dada2_makeSequenceTable.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
@WRITE_FOO@
library(dada2, quietly=T)
#if $plot == "yes"
#if $plot
library(ggplot2, quietly=T)
#end if
Expand All @@ -29,7 +29,7 @@ seqtab <- makeSequenceTable(samples, orderBy = "$orderBy")
reads.per.seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum)
df <- data.frame(length=as.numeric(names(reads.per.seqlen)), count=reads.per.seqlen)
#if $plot == "yes"
#if $plot
pdf( '$plot_output' )
ggplot(data=df, aes(x=length, y=count)) +
geom_col() +
Expand Down Expand Up @@ -75,8 +75,8 @@ write.data( seqtab, '$stable', "dada2_sequencetable" )
<tests>
<test expect_num_outputs="2">
<param name="samples" ftype="dada2_mergepairs" value="mergePairs_F3D0.Rdata"/>
<output name="stable" value="makeSequenceTable_F3D0.tab" ftype="dada2_sequencetable" lines_diff="2"/>
<output name="plot_output" value="makeSequenceTable_F3D0.pdf" ftype="pdf" />
<output name="stable" value="makeSequenceTable_F3D0.tab" ftype="dada2_sequencetable" lines_diff="2"/>
<output name="plot_output" value="makeSequenceTable_F3D0.pdf" ftype="pdf" compare="sim_size" delta="3000" />
</test>
<test expect_num_outputs="1">
<param name="samples" ftype="dada2_mergepairs" value="mergePairs_F3D0.Rdata"/>
Expand Down
4 changes: 2 additions & 2 deletions tools/dada2/dada2_mergePairs.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ saveRDS(merged, file='$merged')
<param name="derepF" ftype="fastq.gz" value="filterAndTrim_F3D0_R1.fq.gz"/>
<param name="dadaR" ftype="dada2_dada" value="dada_F3D0_R2.Rdata"/>
<param name="derepR" ftype="fastq.gz" value="filterAndTrim_F3D0_R2.fq.gz"/>
<output name="merged" value="mergePairs_F3D0.Rdata" ftype="dada2_mergepairs" />
<output name="merged" value="mergePairs_F3D0.Rdata" ftype="dada2_mergepairs" compare="sim_size" delta="400" />
</test>
<!-- test non-default options -->
<test>
<param name="dadaF" ftype="dada2_dada" value="dada_F3D0_R1.Rdata"/>
<param name="derepF" ftype="fastq.gz" value="filterAndTrim_F3D0_R1.fq.gz"/>
<param name="dadaR" ftype="dada2_dada" value="dada_F3D0_R2.Rdata"/>
<param name="derepR" ftype="fastq.gz" value="filterAndTrim_F3D0_R2.fq.gz"/>
<output name="merged" value="mergePairs_F3D0_nondefault.Rdata" ftype="dada2_mergepairs" />
<output name="merged" value="mergePairs_F3D0_nondefault.Rdata" ftype="dada2_mergepairs" compare="sim_size" delta="700"/>
<param name="minOverlap" value="8" />
<param name="maxMismatch" value="1"/>
<param name="justConcatenate" value="TRUE" />
Expand Down
2 changes: 1 addition & 1 deletion tools/dada2/dada2_removeBimeraDenovo.xml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ if(class(unqs)=="list"){
<test expect_num_outputs="1">
<param name="unqs" ftype="dada2_mergepairs" value="mergePairs_F3D0.Rdata"/>
<param name="method" value="pooled"/>
<output name="stable_mergepairs" value="removeBimeraDenovo_F3D0_mergepairs.Rdata" ftype="dada2_mergepairs" />
<output name="stable_mergepairs" value="removeBimeraDenovo_F3D0_mergepairs.Rdata" ftype="dada2_mergepairs" compare="sim_size" delta="400"/>
</test>
</tests>
<help><![CDATA[
Expand Down
Binary file modified tools/dada2/test-data/complexity.pdf
Binary file not shown.
Binary file removed tools/dada2/test-data/derepFastq_F3D0_R1.Rdata
Binary file not shown.
Binary file removed tools/dada2/test-data/derepFastq_F3D0_R2.Rdata
Binary file not shown.
Binary file modified tools/dada2/test-data/learnErrors_F3D0_R1.pdf
Binary file not shown.
Binary file modified tools/dada2/test-data/learnErrors_F3D0_R2.pdf
Binary file not shown.
Binary file modified tools/dada2/test-data/makeSequenceTable_F3D0.pdf
Binary file not shown.
Binary file modified tools/dada2/test-data/qualityProfile.pdf
Binary file not shown.

0 comments on commit 0e4b1be

Please sign in to comment.