-
Notifications
You must be signed in to change notification settings - Fork 437
/
fargene.xml
185 lines (179 loc) · 9.48 KB
/
fargene.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
<tool id="fargene" name="fargene" version="@VERSION@+galaxy1">
<description>Fragmented antibiotic resistance gene identifier </description>
<macros>
<import>macros.xml</import>
</macros>
<xrefs>
<xref type="bio.tools">fargene</xref>
</xrefs>
<expand macro="requirements" />
<version_command>fargene --version</version_command>
<command detect_errors="exit_code"><![CDATA[
#import re
#if $inputs.input_type == 'paired':
#set $safename_R1 = re.sub('[^\w\-_\.]', '_', $inputs.R1.element_identifier)
#set $safename_R2 = re.sub('[^\w\-_\.]', '_', $inputs.R2.element_identifier)
ln -fs '$inputs.R1' ${safename_R1}.fastq &&
ln -fs '$inputs.R2' ${safename_R2}.fastq &&
#elif $inputs.input_type == 'collection':
#for $i, $input in enumerate($inputs.input_collection)
#set $safename_fwd = re.sub('[^\w\-_\.]', '_', $input.element_identifier)
ln -fs '${input.forward}' ${safename_fwd}_1.fastq &&
#set $safename_rvs = re.sub('[^\w\-_\.]', '_', $input.element_identifier)
ln -fs '${input.reverse}' ${safename_rvs}_2.fastq &&
#end for
#elif $inputs.input_type == 'sequence':
#for $input in $inputs.input_sequence
#set $safename_seq = re.sub('[^\w\-_\.]', '_', $input.element_identifier)
ln -fs '$input' ${safename_seq}.fasta &&
#end for
#end if
fargene
--infiles
#if $inputs.input_type in ('paired', 'collection'):
*.fastq
--meta
#elif $inputs.input_type == 'sequence':
*.fasta
#end if
--hmm-model $models
--output fargene_output
--tmp-dir tmp
-p \${GALAXY_SLOTS:-4}
#if $meta_score != 0.0:
--meta-score '$meta_score'
#end if
#if $score != 0.0:
--score '$score'
#end if
#if $protein:
'$protein'
#end if
#if $min_orf_length != 90:
--min-orf-length '$min_orf_length'
#end if
#if $retrieve_whole:
'$retrieve_whole'
#end if
#if $no_orf_predict:
'$no_orf_predict'
#end if
#if $no_quality_filtering:
'$no_quality_filtering'
#end if
#if $no_assembly:
'$no_assembly'
#end if
#if $orf_finder:
'$orf_finder'
#end if
#if $store_peptides:
'$store_peptides'
#end if
&&
#if $inputs.input_type in ('paired', 'collection'):
tar -czf retrievedFragments.tar.gz fargene_output/retrievedFragments
#end if
2>&1
]]> </command>
<inputs>
<conditional name="inputs">
<param name="input_type" type="select" label="Input type" help="Select 'paired end' reads or 'sequence' for genomes/contigs">
<option value="paired" selected="true">Paired</option>
<option value="collection">Paired Collection</option>
<option value="sequence">Contigs/Genomes</option>
</param>
<when value="paired">
<param name="R1" type="data" format="fastqsanger,fastqsanger.gz" label="Forward reads (R1)" help="The file of forward reads in FASTQ format"/>
<param name="R2" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse reads (R2)" help="The file of reverse reads in FASTQ format"/>
</when>
<when value="collection">
<param name="input_collection" format="fastqsanger" type="data_collection" collection_type="list:paired" label="Paired collection"/>
</when>
<when value="sequence">
<param name="input_sequence" type="data" format="fasta" multiple="true" label="Input contigs/genomes" />
</when>
</conditional>
<param name="models" type="select" label="Resistance Genes">
<option value="class_a">Class A beta-lactamases</option>
<option value="class_b_1_2">Subclass B1 and B2 beta-lactamases</option>
<option value="class_b_3">Subclass B3 beta-lactamases</option>
<option value="class_c">Class C beta-lactamases</option>
<option value="class_d_1">Class D beta-lactamases-1</option>
<option value="class_d_2">Class D beta-lactamases-2</option>
<option value="qnr">QNR</option>
</param>
<param argument="--score" type="float" value="0.0" label="The threshold score for a sequence to be classified as a (almost) complete gene" />
<param argument="--meta-score" type="float" value="0.0" label="The threshold score for a fragment to be classified as a positive. Expressed as score per amino acid" />
<param argument="--protein" type="boolean" truevalue="--protein" falsevalue="" checked="False" label="Rescue short unassmebled plasmids" />
<param argument="--min-orf-length" type="integer" min="1" value="90" label="The minimal length for a retrieved predicted ORF (nt)" />
<param argument="--retrieve-whole" type="boolean" truevalue="--retrieve-whole " falsevalue="" checked="False" label="Use this flag if the whole sequence where a hit is detected should be retrieved" />
<param argument="--no-orf-predict" type="boolean" truevalue="--no-orf-predict" falsevalue="" checked="False" label="Do not perform ORF prediction" />
<param argument="--no-quality-filtering" type="boolean" truevalue="--no-quality-filtering" falsevalue="" checked="False" label="Use if no quality control should be performed on the metagenomic data" />
<param argument="--no-assembly" type="boolean" truevalue="--no-assembly" falsevalue="" checked="False" label="Use if you want to skip the assembly and retrieval of contigs for metagenomic data" />
<param argument="--orf-finder" type="boolean" truevalue="--orf-finder" falsevalue="" checked="False" label="Use NCBI ORFfinder instead of prodigal for ORF prediction of genomes/contigs" />
<param argument="--store-peptides" type="boolean" truevalue="--store-peptides" falsevalue="" checked="False" label="Store the translated sequences"
help="Useful if you plan to redo the analysis using a different model and want to skip the preprocessing steps" />
</inputs>
<outputs>
<data name="summary" format="txt" from_work_dir="fargene_output/results_summary.txt" label="${tool.name} on ${on_string} (Summary)">
</data>
<data name="retrievedFragments" format="tar.gz" from_work_dir="retrievedFragments.tar.gz" label="${tool.name} on ${on_string} (Retrieved Fragments)">
<filter>inputs["input_type"] in ['paired' , 'collection']</filter>
</data>
<data name="fargene_log" format="txt" from_work_dir="fargene_analysis.log" label="${tool.name} on ${on_string} (log)">
</data>
<collection name="hmmsearchresults" type="list" label="HMM Search Result">
<discover_datasets pattern="(?P<name>.+).out" directory="fargene_output/hmmsearchresults" format="txt" visible="false" />
</collection>
<collection name="predictedGenes" type="list" label="Predicted Genes">
<discover_datasets pattern="(?P<name>.+).fasta" directory="fargene_output/predictedGenes" format="fasta" visible="false" />
</collection>
</outputs>
<tests>
<test expect_num_outputs="5">
<conditional name="inputs">
<param name="input_type" value="paired"/>
<param name="R1" value="reads_1.fastq"/>
<param name="R2" value="reads_2.fastq"/>
</conditional>
<output name="summary" file="paired/results_summary.txt" compare="sim_size"/>
</test>
<test expect_num_outputs="5">
<conditional name="inputs">
<param name="input_type" value="collection"/>
<param name="input_collection">
<collection type="list:paired">
<element name="Pair1">
<collection type="paired">
<element name="forward" value="reads_1.fastq" ftype="fastqsanger"/>
<element name="reverse" value="reads_2.fastq" ftype="fastqsanger"/>
</collection>
</element>
</collection>
</param>
</conditional>
<output name="summary" file="paired/results_summary.txt" compare="sim_size"/>
</test>
<test expect_num_outputs="4">
<conditional name="inputs">
<param name="input_type" value="sequence"/>
<param name="input_sequence" value="klebsiella_plasmid.fasta"/>
<param name="models" value="class_b_1_2" />
</conditional>
<output name="summary" file="contigs/results_summary.txt" compare="sim_size"/>
</test>
</tests>
<help><![CDATA[
fARGene (Fragmented Antibiotic Resistance Gene iENntifiEr ) is a tool that takes either fragmented metagenomic data or longer sequences as input and predicts and delivers full-length antiobiotic resistance genes as output. The tool includes developed and optimized models for a number or resistance gene types, and the functionality to create and optimize models of your own choice of resistance genes.
The current version of the tool includes developed and optimized models for identification of the following resistance genes
- Class A beta-lactamases
- Subclass B1 and B2 beta-lactamases
- Subclass B3 beta-lactamases
- Class C beta-lactamases
- Class D beta-lactamases
- qnr
]]>
</help>
<expand macro="citations" />
</tool>