Skip to content

Commit

Permalink
Give megahit info on available memory (galaxyproject#5654)
Browse files Browse the repository at this point in the history
* give megahit info on available memory

otherwise, it will use 90% of the available memory which does not work
in HPC
  • Loading branch information
bernt-matthias authored Nov 27, 2023
1 parent af2e922 commit 6442170
Showing 1 changed file with 26 additions and 22 deletions.
48 changes: 26 additions & 22 deletions tools/megahit/megahit_wrapper.xml
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
<?xml version='1.0' encoding='utf-8'?>
<tool id="megahit" name="MEGAHIT" version="@TOOL_VERSION@+galaxy0">
<tool id="megahit" name="MEGAHIT" version="@TOOL_VERSION@+galaxy1">
<description>for metagenomics assembly</description>
<xrefs>
<xref type="bio.tools">megahit</xref>
</xrefs>
<macros>
<token name="@TOOL_VERSION@">1.2.9</token>
</macros>
<xrefs>
<xref type="bio.tools">megahit</xref>
</xrefs>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">megahit</requirement>
</requirements>
<version_command>megahit --version</version_command>
<command detect_errors="exit_code"><![CDATA[
if [[ -n "\$GALAXY_MEMORY_MB" ]]; then
MEMORY="-m \$((GALAXY_MEMORY_MB * 1024))";
fi;
megahit
--num-cpu-threads \${GALAXY_SLOTS:-4}
#if $input_option.choice == 'paired'
Expand Down Expand Up @@ -52,6 +55,7 @@ megahit
${advanced_section.nolocal}
${advanced_section.kmin1pass}
--min-contig-len '${output_section.min_contig_len}'
\$MEMORY
&& cat megahit_out/log
]]></command>
<inputs>
Expand Down Expand Up @@ -88,36 +92,36 @@ megahit
</when>
</conditional>
<section name="basic_section" title="Basic assembly options" expanded="True">
<param name="min_count" argument="--min-count" type="integer" value="2" label="minimum multiplicity for filtering (k_min+1)-mers" help="(kmin+1)-mer with multiplicity lower than d (default 2, specified by --min-count option) will be discarded. You should be cautious to set d less than 2, which will lead to a much larger and noisy graph. We recommend using the default value 2 for metagenomics assembly. If you want to use MEGAHIT to do generic assemblies, please change this value according to the sequencing depth. (recommend --min-count 3 for >40x)."/>
<param argument="--min-count" type="integer" value="2" label="minimum multiplicity for filtering (k_min+1)-mers" help="(kmin+1)-mer with multiplicity lower than d (default 2, specified by --min-count option) will be discarded. You should be cautious to set d less than 2, which will lead to a much larger and noisy graph. We recommend using the default value 2 for metagenomics assembly. If you want to use MEGAHIT to do generic assemblies, please change this value according to the sequencing depth. (recommend --min-count 3 for >40x)."/>
<conditional name="k_mer">
<param name="k_mer_method" type="select" label="K-mer specification method">
<option value="klist_method">Specify list</option>
<option value="klim_method">Specify min, max, and step values</option>
</param>
<when value="klist_method">
<param name="k_list" argument="--k-list" type="text" value="21,29,39,59,79,99,119,141" label="Comma-separated list of kmer size" help="all must be odd, in the range 15-255, and with increments &lt;= 28"/>
<param argument="--k-list" type="text" value="21,29,39,59,79,99,119,141" label="Comma-separated list of kmer size" help="all must be odd, in the range 15-255, and with increments &lt;= 28"/>
</when>
<when value="klim_method">
<param name="k_min" argument="--k-min" type="integer" value="21" label="Minimum kmer size" max="255" help="Must be odd number. For ultra complex metagenomics data such as soil, a larger kmin, say 27, is recommended to reduce the complexity of the de Bruijn graph. Quality trimming is also recommended. For high-depth generic data, large --k-min (25 to 31) is recommended. Smaller --k-step, say 10, is more friendly to low-coverage datasets."/>
<param name="k_max" argument="--k-max" type="integer" value="141" label="Maximum kmer size" max="255" help="must be odd number"/>
<param name="k_step" argument="--k-step" type="integer" value="12" label="Increment of kmer size of each iteration" max="28" help="must be even number"/>
<param argument="--k-min" type="integer" value="21" label="Minimum kmer size" max="255" help="Must be odd number. For ultra complex metagenomics data such as soil, a larger kmin, say 27, is recommended to reduce the complexity of the de Bruijn graph. Quality trimming is also recommended. For high-depth generic data, large --k-min (25 to 31) is recommended. Smaller --k-step, say 10, is more friendly to low-coverage datasets."/>
<param argument="--k-max" type="integer" value="141" label="Maximum kmer size" max="255" help="must be odd number"/>
<param argument="--k-step" type="integer" value="12" label="Increment of kmer size of each iteration" max="28" help="must be even number"/>
</when>
</conditional>
</section>
<section name="advanced_section" title="Advanced assembly options" expanded="False">
<param name="nomercy" type="boolean" checked="false" truevalue="--no-mercy" falsevalue="" label="Do not add mercy kmers" help="Mercy kmers are specially designed for metagenomics assembly to recover low coverage sequences. For generic dataset >= 30x, MEGAHIT may generate better results with no mercy kmers." />
<param name="bubble_level" argument="--bubble-level" type="integer" value="2" min="0" max="2" label="Intensity of bubble merging (0-2), 0 to disable" />
<param name="merge_level" argument="--merge-level" type="text" value="20,0.95" label="Merge complex bubbles of length" />
<param name="prune_level" argument="--prune-level" type="integer" value="2" min="0" max="3" label="Strength of low depth pruning" />
<param name="prune_depth" argument="--prune-depth" type="integer" value="2" min="0" label="Remove unitigs with avg kmer depth less than this value" />
<param name="disconnect_ratio" argument="--disconnect-ratio" type="float" value="0.1" label="Disconnect unitigs if its depth is less than this ratio times the total depth of itself and its siblings" />
<param name="low_local_ratio" argument="--low-local-ratio" type="float" value="0.2" label="Remove unitigs if its depth is less than this ratio times the average depth of the neighborhoods" />
<param name="cleaning_rounds" argument="--cleaning-rounds" type="integer" value="5" label="Number of rounds for graph cleanning" />
<param argument="--bubble-level" type="integer" value="2" min="0" max="2" label="Intensity of bubble merging (0-2), 0 to disable" />
<param argument="--merge-level" type="text" value="20,0.95" label="Merge complex bubbles of length" />
<param argument="--prune-level" type="integer" value="2" min="0" max="3" label="Strength of low depth pruning" />
<param argument="--prune-depth" type="integer" value="2" min="0" label="Remove unitigs with avg kmer depth less than this value" />
<param argument="--disconnect-ratio" type="float" value="0.1" label="Disconnect unitigs if its depth is less than this ratio times the total depth of itself and its siblings" />
<param argument="--low-local-ratio" type="float" value="0.2" label="Remove unitigs if its depth is less than this ratio times the average depth of the neighborhoods" />
<param argument="--cleaning-rounds" type="integer" value="5" label="Number of rounds for graph cleanning" />
<param name="nolocal" type="boolean" checked="false" truevalue="--no-local" falsevalue="" label="Disable local assembly" />
<param name="kmin1pass" type="boolean" checked="false" truevalue="--kmin-1pass" falsevalue="" label="Use 1pass mode to build SdBG of k_min" />
</section>
<section name="output_section" title="Output options" expanded="True">
<param name="min_contig_len" argument="--min-contig-len" type="integer" value="200" label="Minimum length of contigs to output" />
<param argument="--min-contig-len" type="integer" value="200" label="Minimum length of contigs to output" />
<param name="show_intermediate_contigs" type="boolean" checked="false" label="Return intermediate contigs?"/>
</section>
</inputs>
Expand All @@ -129,7 +133,7 @@ megahit
</collection>
</outputs>
<tests>
<test>
<test expect_num_outputs="1">
<conditional name="input_option">
<param name="choice" value="single"/>
<param name="single_files" value="refExample.fa" ftype="fasta"/>
Expand All @@ -140,22 +144,22 @@ megahit
</assert_contents>
</output>
</test>
<test>
<test expect_num_outputs="1">
<conditional name="input_option">
<param name="choice" value="interleaved"/>
<param name="interleaved_file" value="interleaved-fq.fa"/>
</conditional>
<output name="output" file="interleaved_result.fa"/>
</test>
<test>
<test expect_num_outputs="1">
<conditional name="input_option">
<param name="choice" value="paired"/>
<param name="fastq_input1" value="paired-fq1.fa"/>
<param name="fastq_input2" value="paired-fq2.fa"/>
</conditional>
<output name="output" file="paired_result.fa"/>
</test>
<test>
<test expect_num_outputs="1">
<conditional name="input_option">
<param name="choice" value="paired_collection"/>
<conditional name="batchmode">
Expand Down

0 comments on commit 6442170

Please sign in to comment.