tools/fargene/fargene.xml

<tool id="fargene" name="fargene" version="@VERSION@+galaxy1">
    <description>Fragmented antibiotic resistance gene identifier </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <xrefs>
        <xref type="bio.tools">fargene</xref>
    </xrefs>
    <expand macro="requirements" />
    <version_command>fargene --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
    #import re
    #if $inputs.input_type == 'paired':
        #set $safename_R1 = re.sub('[^\w\-_\.]', '_', $inputs.R1.element_identifier)
        #set $safename_R2 = re.sub('[^\w\-_\.]', '_', $inputs.R2.element_identifier)
        ln -fs '$inputs.R1' ${safename_R1}.fastq &&
        ln -fs '$inputs.R2' ${safename_R2}.fastq &&
    #elif $inputs.input_type == 'collection':
        #for $i, $input in enumerate($inputs.input_collection)
            #set $safename_fwd = re.sub('[^\w\-_\.]', '_', $input.element_identifier)
            ln -fs '${input.forward}' ${safename_fwd}_1.fastq &&
            #set $safename_rvs = re.sub('[^\w\-_\.]', '_', $input.element_identifier)
            ln -fs '${input.reverse}' ${safename_rvs}_2.fastq &&
        #end for
    #elif $inputs.input_type == 'sequence':
        #for $input in $inputs.input_sequence
            #set $safename_seq = re.sub('[^\w\-_\.]', '_', $input.element_identifier)
            ln -fs '$input' ${safename_seq}.fasta &&
        #end for
    #end if

    fargene
    --infiles  
    #if $inputs.input_type in ('paired', 'collection'):
        *.fastq
    --meta
    #elif $inputs.input_type == 'sequence':
        *.fasta
    #end if
    --hmm-model $models
    --output fargene_output
    --tmp-dir tmp
    -p \${GALAXY_SLOTS:-4}
    #if $meta_score != 0.0:
        --meta-score '$meta_score'
    #end if
    #if $score != 0.0:
        --score '$score'
    #end if
    #if $protein:
        '$protein'
    #end if
    #if $min_orf_length != 90:
        --min-orf-length '$min_orf_length'
    #end if
    #if $retrieve_whole:
        '$retrieve_whole'
    #end if
    #if $no_orf_predict:
        '$no_orf_predict'
    #end if
    #if $no_quality_filtering:
        '$no_quality_filtering'
    #end if
    #if $no_assembly:
        '$no_assembly'
    #end if
    #if $orf_finder:
        '$orf_finder'
    #end if
    #if $store_peptides:
        '$store_peptides'
    #end if
    &&
    #if $inputs.input_type in ('paired', 'collection'):
        tar -czf retrievedFragments.tar.gz fargene_output/retrievedFragments
    #end if
    2>&1
    ]]>    </command>
    <inputs>
        <conditional name="inputs">
            <param name="input_type" type="select" label="Input type" help="Select 'paired end' reads or 'sequence' for genomes/contigs">
                <option value="paired" selected="true">Paired</option>
                <option value="collection">Paired Collection</option>
                <option value="sequence">Contigs/Genomes</option>
            </param>
            <when value="paired">
                <param name="R1" type="data" format="fastqsanger,fastqsanger.gz" label="Forward reads (R1)" help="The file of forward reads in FASTQ format"/>
                <param name="R2" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse reads (R2)" help="The file of reverse reads in FASTQ format"/>
            </when>
            <when value="collection">
                <param name="input_collection" format="fastqsanger" type="data_collection" collection_type="list:paired" label="Paired collection"/>
            </when>
            <when value="sequence">
                <param name="input_sequence" type="data" format="fasta" multiple="true" label="Input contigs/genomes" />
            </when>
        </conditional>

        <param name="models" type="select" label="Resistance Genes">
            <option value="class_a">Class A beta-lactamases</option>
            <option value="class_b_1_2">Subclass B1 and B2 beta-lactamases</option>
            <option value="class_b_3">Subclass B3 beta-lactamases</option>
            <option value="class_c">Class C beta-lactamases</option>
            <option value="class_d_1">Class D beta-lactamases-1</option>
            <option value="class_d_2">Class D beta-lactamases-2</option>
            <option value="qnr">QNR</option>
        </param>
        <param argument="--score" type="float" value="0.0" label="The threshold score for a sequence to be classified as a (almost) complete gene" />
        <param argument="--meta-score" type="float" value="0.0" label="The threshold score for a fragment to be classified as a positive. Expressed as score per amino acid" />
        <param argument="--protein" type="boolean" truevalue="--protein" falsevalue="" checked="False" label="Rescue short unassmebled plasmids" />
        <param argument="--min-orf-length" type="integer" min="1" value="90" label="The minimal length for a retrieved predicted ORF (nt)" />
        <param argument="--retrieve-whole" type="boolean" truevalue="--retrieve-whole " falsevalue="" checked="False" label="Use this flag if the whole sequence where a hit is detected should be retrieved" />
        <param argument="--no-orf-predict" type="boolean" truevalue="--no-orf-predict" falsevalue="" checked="False" label="Do not perform ORF prediction" />
        <param argument="--no-quality-filtering" type="boolean" truevalue="--no-quality-filtering" falsevalue="" checked="False" label="Use if no quality control should be performed on the metagenomic data" />
        <param argument="--no-assembly" type="boolean" truevalue="--no-assembly" falsevalue="" checked="False" label="Use if you want to skip the assembly and retrieval of contigs for metagenomic data" />
        <param argument="--orf-finder" type="boolean" truevalue="--orf-finder" falsevalue="" checked="False" label="Use NCBI ORFfinder instead of prodigal for ORF prediction of genomes/contigs" />
        <param argument="--store-peptides" type="boolean" truevalue="--store-peptides" falsevalue="" checked="False" label="Store the translated sequences" 
            help="Useful if you plan to redo the analysis using a different model and want to skip the preprocessing steps" />

    </inputs>
    <outputs>
        <data name="summary" format="txt" from_work_dir="fargene_output/results_summary.txt" label="${tool.name} on ${on_string} (Summary)">
        </data>
        <data name="retrievedFragments" format="tar.gz" from_work_dir="retrievedFragments.tar.gz" label="${tool.name} on ${on_string} (Retrieved Fragments)">
            <filter>inputs["input_type"] in ['paired' , 'collection']</filter>
        </data>
        <data name="fargene_log" format="txt" from_work_dir="fargene_analysis.log" label="${tool.name} on ${on_string} (log)">
        </data>
        <collection name="hmmsearchresults" type="list" label="HMM Search Result">
            <discover_datasets pattern="(?P&lt;name&gt;.+).out" directory="fargene_output/hmmsearchresults" format="txt" visible="false" />
        </collection>
        <collection name="predictedGenes" type="list" label="Predicted Genes">
            <discover_datasets pattern="(?P&lt;name&gt;.+).fasta" directory="fargene_output/predictedGenes" format="fasta" visible="false" />
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="5">
            <conditional name="inputs">
                <param name="input_type" value="paired"/>
                <param name="R1" value="reads_1.fastq"/>
                <param name="R2" value="reads_2.fastq"/>
            </conditional>
            <output name="summary" file="paired/results_summary.txt" compare="sim_size"/>
        </test>
        <test expect_num_outputs="5">
            <conditional name="inputs">
                <param name="input_type" value="collection"/>
                <param name="input_collection">
                    <collection type="list:paired">
                        <element name="Pair1">
                            <collection type="paired">
                                <element name="forward" value="reads_1.fastq" ftype="fastqsanger"/>
                                <element name="reverse" value="reads_2.fastq" ftype="fastqsanger"/>
                            </collection>
                        </element>
                    </collection>
                </param>
            </conditional>
            <output name="summary" file="paired/results_summary.txt" compare="sim_size"/>
        </test>
        <test expect_num_outputs="4">
            <conditional name="inputs">
                <param name="input_type" value="sequence"/>
                <param name="input_sequence" value="klebsiella_plasmid.fasta"/>
                <param name="models" value="class_b_1_2" />
            </conditional>
            <output name="summary" file="contigs/results_summary.txt" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[

    fARGene (Fragmented Antibiotic Resistance Gene iENntifiEr ) is a tool that takes either fragmented metagenomic data or longer sequences as input and predicts and delivers full-length antiobiotic resistance genes as output. The tool includes developed and optimized models for a number or resistance gene types, and the functionality to create and optimize models of your own choice of resistance genes.

    The current version of the tool includes developed and optimized models for identification of the following resistance genes

    - Class A beta-lactamases
    - Subclass B1 and B2 beta-lactamases
    - Subclass B3 beta-lactamases
    - Class C beta-lactamases
    - Class D beta-lactamases
    - qnr
    ]]>
    </help>
    <expand macro="citations" />
</tool>