Skip to content

Commit

Permalink
Cmsearch: added missing args to command (#1328)
Browse files Browse the repository at this point in the history
* added missing args to command

* bump version

* Bump infernal version, update cmscan test output, and modify cmpress command section

* cmsearch version suffix back to 0

* Lint warnings

* change mapali_cond option order in cmalign

* Lint warning

* Fix cmalign

* data_table set up

* Move DB selector to macros and add cached DB tests

* Re-add unintentionally removed tests

* Fix cmscan

---------

Co-authored-by: Björn Grüning <bjoern@gruenings.eu>
Co-authored-by: paulzierep <paul.zierep@googlemail.com>
  • Loading branch information
3 people authored Nov 11, 2024
1 parent 7b4e1e8 commit 09c1d4a
Show file tree
Hide file tree
Showing 11 changed files with 169 additions and 144 deletions.
51 changes: 25 additions & 26 deletions tools/rna_tools/infernal/cmalign.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
<macros>
<import>macros.xml</import>
</macros>
<expand macro="xrefs"/>
<expand macro="requirements" />
<expand macro="stdio" />
<expand macro="xrefs"/>
<command>
<![CDATA[
Expand Down Expand Up @@ -33,7 +33,7 @@
--mxsize $speed_memory_cont.hmm_banding.mxsize
#end if
#if $other_opts.mapali_opt.mapali_cond
#if $other_opts.mapali_opt.mapali_cond=="true"
--mapali '$other_opts.mapali_opt.mapali'
$other_opts.mapali_opt.mapstr
#end if
Expand Down Expand Up @@ -68,26 +68,7 @@
<inputs>

<param name="seqdb" type="data" format="fasta" label="Sequence database"/>

<conditional name="cm_opts">
<param name="cm_opts_selector" type="select" label="Subject covariance models">
<option value="db" selected="True">Locally installed covariance models</option>
<option value="histdb">Covariance model from your history</option>
</param>
<when value="db">
<param name="database" type="select" label="Covariance models">
<options from_file="infernal.loc">
<column name="value" index="0"/>
<column name="name" index="1"/>
<column name="path" index="2"/>
</options>
</param>
</when>
<when value="histdb">
<param name="cmfile" type="data" format="cm" label="Covariance models file from the history."/>
</when>
</conditional>

<expand macro="DB" />
<param argument="-g" truevalue="-g" falsevalue="" checked="False" type="boolean"
label="Turn on the glocal alignment algorithm" help="... global with respect to the query model and local with respect to the target database."/>

Expand All @@ -105,8 +86,11 @@
</when>
</conditional>
<conditional name="notrunc_opt">
<param argument="--notrunc" truevalue="" falsevalue="--notrunc" checked="true" type="boolean"
label="Use truncated alignment algorithm" help=""/>
<param argument="--notrunc" type="select"
label="Use truncated alignment algorithm">
<option value="">Yes</option>
<option value="--notrunc">No</option>
</param>
<when value=""></when>
<when value="--notrunc">
<param argument="--sub" type="boolean" truevalue="--sub" falsevalue="" checked="false"
Expand Down Expand Up @@ -143,8 +127,11 @@

<section name="other_opts" title="Other options">
<conditional name="mapali_opt">
<param name="mapali_cond" type="boolean" checked="false"
label="Read the aligment file that used to build the CM" help="The alignment from the file is held fixed. This allows you to align sequences to a model with cmalign and view them in the context of an existing trusted multiple alignment."/>
<param name="mapali_cond" type="select"
label="Read the aligment file that used to build the CM" help="The alignment from the file is held fixed. This allows you to align sequences to a model with cmalign and view them in the context of an existing trusted multiple alignment.">
<option value="false">No</option>
<option value="true">Yes</option>
</param>
<when value="true">
<param argument="--mapali" type="data" format="fasta"
label="The aligment file that used to build the CM" help=""/>
Expand Down Expand Up @@ -189,6 +176,18 @@
</assert_contents>
</output>
</test>
<test>
<conditional name="cm_opts">
<param name="cm_opts_selector" value="db"/>
<param name="database" value="cmalign_input2_tRNA5.c.cm"/>
</conditional>
<param name="seqdb" value="cmalign_input_mrum_tRNAs10.fa"/>
<output name="outfile">
<assert_contents>
<has_text text="GGAGCUAUAGCUCAAU..GGC"/>
</assert_contents>
</output>
</test>
</tests>
<help>
<![CDATA[
Expand Down
20 changes: 11 additions & 9 deletions tools/rna_tools/infernal/cmbuild.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
<import>macros.xml</import>
</macros>
<!--parallelism method="multi" split_inputs="alignment_infile" split_mode="to_size" split_size="10" merge_outputs="cmfile_outfile"></parallelism-->
<expand macro="xrefs"/>
<expand macro="requirements" />
<expand macro="stdio" />
<expand macro="xrefs"/>
<command>
<![CDATA[
cmbuild
Expand Down Expand Up @@ -196,19 +196,22 @@
<option value="--cyk">align with the CYK algorithm</option>
</param>

<param name="refine_output" truevalue="" falsevalue="" checked="False" type="boolean"
<param name="refine_output" truevalue="--refine" falsevalue="" checked="False" type="boolean"
label="Output the refined alignment file as it is used to build the covariance model" help=""/>

</when>
</conditional>

<param name="is_summery_output" truevalue="" falsevalue="" checked="False" type="boolean"
<param name="is_summery_output" truevalue="-o" falsevalue="" checked="False" type="boolean"
label="Output a summery file?" help=""/>

<!-- calibrate options -->
<conditional name="Calibrate">
<param name="selector" type="boolean" checked="true" label="Calibrate the covariance model"
help="A CM file must be calibrated with cmcalibrate before it can be used in cmsearch or cmscan. cmcalibrate is very slow. It takes a couple of hours to calibrate a single average sized CM on a single CPU"/>
<param name="selector" type="select" label="Calibrate the covariance model"
help="A CM file must be calibrated with cmcalibrate before it can be used in cmsearch or cmscan. cmcalibrate is very slow. It takes a couple of hours to calibrate a single average sized CM on a single CPU">
<option value="true">Yes</option>
<option value="false">No</option>
</param>
<when value="false"/>
<when value="true">
<param argument="-L" type="float" value="1.6" min="0.01" max="160" label="Total length of random sequences to search" help="Set random seq length to search in Mb (megabases)"/>
Expand Down Expand Up @@ -325,7 +328,7 @@
</outputs>

<tests>
<test>
<test expect_num_outputs="1">
<param name="alignment_infile" value="cmbuild_input_tRNA5.sto"/>
<conditional name="Calibrate">
<param name="selector" value="true"/>
Expand All @@ -338,7 +341,7 @@
</output>
</test>
<!-- Test emax seq parameter -->
<test>
<test expect_num_outputs="1">
<param name="alignment_infile" value="cmbuild_input_tRNA5.sto"/>
<conditional name="Calibrate">
<param name="selector" value="true"/>
Expand Down Expand Up @@ -422,8 +425,7 @@ These options control how consensus columns are defined in an alignment.
cmbuild uses an ad hoc sequence weighting algorithm to downweight closely related sequences and upweight distantly related ones. This has the effect of making models less biased by uneven phylogenetic representation. For example, two identical sequences would typically each receive half the weight that one sequence would. These options control which algorithm gets used.
- *--wgb*: Use the Henikoff position-based sequence weighting scheme [Henikoff
and Henikoff, J. Mol. Biol. 243:574, 1994]. This is the default.
- *--wgb*: Use the Henikoff position-based sequence weighting scheme [Henikoff and Henikoff, J. Mol. Biol. 243:574, 1994]. This is the default.
- *--wgsc*: Use the Gerstein/Sonnhammer/Chothia weighting algorithm [Gerstein et al, J. Mol. Biol. 235:1067, 1994].
- *--wnone*: Turn sequence weighting off; e.g. explicitly set all sequence weights to 1.0.
- *--wgiven*: Use sequence weights as given in annotation in the input alignment file. If no weights were given, assume they are all 1.0. The default is to determine new sequence weights by the Gerstein/Sonnhammer/Chothia algorithm, ignoring any annotated weights.
Expand Down
51 changes: 20 additions & 31 deletions tools/rna_tools/infernal/cmpress.xml
Original file line number Diff line number Diff line change
@@ -1,47 +1,27 @@
<tool id="infernal_cmpress" name="cmpress" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
<tool id="infernal_cmpress" name="cmpress" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0">
<description> Prepare a covariance model database for cmscan</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="xrefs"/>
<expand macro="requirements" />
<expand macro="stdio" />
<expand macro="xrefs"/>
<command>
<command detect_errors="aggressive">
<![CDATA[
## CM file from the history or stored as database on disc
mkdir -p ./cm_model &&
#if str($cm_opts.cm_opts_selector) == "db":
cmfile_path='$cm_opts.database.fields.path'
ln -s '$cm_opts.database.fields.path' ./cm_model/
#else:
##'$cm_opts.cmfile'
cmfile_path='$cmfile'
ln -s '$cmfile' ./cm_model/
#end if
&&
cmpress -F "\$cmfile_path" &&
##cmpress creates four files in the same directory of the cm file
cd `dirname "\$cmfile_path"` &&
tar -cvf '$outfile' `basename "\$cmfile_path"`.i1*
cmpress -F ./cm_model/* &&
cd ./cm_model/ &&
tar -cvf '$outfile' *.i1f *.i1i *.i1m *.i1p
]]>
</command>
<inputs>
<conditional name="cm_opts">
<param name="cm_opts_selector" type="select" label="Subject covariance models &lt;cmdb&gt; ">
<option value="db" >Locally installed covariance models</option>
<option value="histdb" selected="True">Covariance model from your history</option>
</param>
<when value="db">
<param name="database" type="select" label="Covariance models">
<options from_file="infernal.loc">
<column name="value" index="0"/>
<column name="name" index="1"/>
<column name="path" index="2"/>
</options>
</param>
</when>
<when value="histdb">
<param name="cmfile" type="data" format="cm" label="Covariance models file from the history."/>
</when>
</conditional>

<expand macro="DB" />
</inputs>
<outputs>
<data format="tar" name="outfile" label="cmpress on ${on_string}"/>
Expand All @@ -58,7 +38,16 @@
</assert_stdout>

</test>
<test>
<conditional name="cm_opts">
<param name="cm_opts_selector" value="db"/>
<param name="database" value="cmpress_input_minifam.cm"/>
</conditional>
<assert_stdout>
<has_text text="Working... done."/>
</assert_stdout>

</test>
</tests>
<help>
<![CDATA[
Expand Down Expand Up @@ -86,4 +75,4 @@ For further questions please refere to the Infernal `Userguide <http://eddylab.o
<expand macro="citations" />


</tool>
</tool>
51 changes: 22 additions & 29 deletions tools/rna_tools/infernal/cmscan.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,21 @@
<macros>
<import>macros.xml</import>
</macros>
<expand macro="xrefs"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<expand macro="xrefs"/>
<command>
<command detect_errors="aggressive">
<![CDATA[
## a temp file is needed, because the standard tabular output from infernal is not usefull in Galaxy
## it will be converted to a tab delimited file and piped to Galaxy
temp_tabular_output=\$(mktemp) &&
#if str($cm_opts.cm_opts_selector) == "histdb":
ln -s '$cm_opts.cmfile' cmdb.cm &&
ln -s '$cm_opts.cmfile' cmdb.cm
#else:
ln -s '$cm_opts.database.fields.path' cmdb.cm
#end if
&&
tar xvf '$aux_files' &&
ln -s `find *.i1f` cmdb.cm.i1f &&
ln -s `find *.i1i` cmdb.cm.i1i &&
Expand Down Expand Up @@ -66,12 +68,7 @@
#end if
#end if
## CM file from the history or stored as database on disc
#if str($cm_opts.cm_opts_selector) == "db":
'$cm_opts.database.fields.path'
#else:
##'$cm_opts.cmfile'
cmdb.cm
#end if
cmdb.cm
## sequence file
'$seqdb'
######### Parse the output file in order to fix a problem reported in https://help.galaxyproject.org/t/messy-infernal-cmscan-output/5984
Expand All @@ -92,25 +89,7 @@
</command>
<inputs>
<param name="seqdb" type="data" format="fasta" label="Sequence database &lt;seqfile&gt;"/>

<conditional name="cm_opts">
<param name="cm_opts_selector" type="select" label="Subject covariance models &lt;cmdb&gt; ">
<option value="db" >Locally installed covariance models</option>
<option value="histdb" selected="True">Covariance model from your history</option>
</param>
<when value="db">
<param name="database" type="select" label="Covariance models">
<options from_file="infernal.loc">
<column name="value" index="0"/>
<column name="name" index="1"/>
<column name="path" index="2"/>
</options>
</param>
</when>
<when value="histdb">
<param name="cmfile" type="data" format="cm" label="Covariance models file from the history."/>
</when>
</conditional>
<expand macro="DB" />
<param name="aux_files" type="data" format="tar" label="Auxillury files" help="A tar file contains the four auxillury files suffixed .i1{fimp}. These files are generated after pressing the cm files using cmpress"/>

<param argument="-g" truevalue="-g" falsevalue="" checked="False" type="boolean"
Expand Down Expand Up @@ -258,6 +237,20 @@
</assert_contents>
</output>
</test>
<test>
<conditional name="cm_opts">
<param name="cm_opts_selector" value="db"/>
<param name="database" value="minifam.cm" />
</conditional>
<param name="aux_files" value="minifam.tar" ftype="tar"/>
<param name="seqdb" value="metag-example.fa"/>
<output name="outfile" file="test_cmscan.tabular" ftype="tabular" lines_diff="8">
<assert_contents>
<has_n_lines n="15"/>
<has_text text="AAGA01015927.1"/>
</assert_contents>
</output>
</test>

</tests>
<help>
Expand Down
Loading

0 comments on commit 09c1d4a

Please sign in to comment.