Skip to content

Commit 15aa959

Browse files
committed
Add RGI and RGI bwt
1 parent 5997ce6 commit 15aa959

File tree

6 files changed

+67
-39
lines changed

6 files changed

+67
-39
lines changed

config/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ params:
1919
read_length: 150 # readlengh to use for indexing the preclustered db
2020
window: 20 # window size to allow min and max read length (min-len = read_length - window)
2121
rgi:
22-
db_version: "3.1.1"
22+
#empty, no options exposed
2323
srax:
2424
dbtype: "basic"
2525
amrplusplus:

envs/mykrobe.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name: mykrobe
22
channels:
3-
- defaults
43
- conda-forge
54
- bioconda
5+
- defaults
66
dependencies:
77
- mykrobe=0.9.0

envs/rgi.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ channels:
44
- bioconda
55
- defaults
66
dependencies:
7-
- rgi=5.1.1
7+
- rgi=6.0.3

rules/rgi.smk

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
rule get_rgi_db:
22
output:
3-
card_db = os.path.join(config["params"]["db_dir"], "card", "card.json")
3+
card_db = os.path.join(config["params"]["db_dir"], "card", "card.json")
44
params:
5-
db_version = config['params']['rgi']['db_version'],
65
db_dir = os.path.join(config["params"]["db_dir"], "card")
76
log:
87
"logs/rgi_db.log"
98
shell:
10-
"""
9+
"""{{
1110
mkdir -p {params.db_dir}
12-
curl https://card.mcmaster.ca/latest/data --output {params.db_dir}/card.tar.bz2
11+
wget -c -q -O {params.db_dir}/card.tar.bz2 'https://card.mcmaster.ca/latest/data'
1312
tar -C {params.db_dir} -xvf {params.db_dir}/card.tar.bz2
13+
rm -f {params.db_dir}/card.tar.bz2
14+
}} >{log} 2>&1
1415
"""
1516

1617
rule run_rgi:
@@ -22,21 +23,29 @@ rule run_rgi:
2223
metadata = "results/{sample}/rgi/metadata.txt"
2324
message: "Running rule run_rgi on {wildcards.sample} with contigs"
2425
log:
25-
"logs/rgi_{sample}.log"
26+
"logs/rgi_{sample}.log"
2627
conda:
27-
"../envs/rgi.yaml"
28+
"../envs/rgi.yaml"
2829
threads:
29-
config["params"]["threads"]
30+
config["params"]["threads"]
3031
params:
31-
output_prefix = "results/{sample}/rgi/rgi"
32+
out_dir = "results/{sample}/rgi"
3233
shell:
33-
"""
34-
rgi load --card_json {input.card_db} > {log} 2>&1
35-
rgi main --input_sequence {input.contigs} --output_file {params.output_prefix} --clean --num_threads {threads} >>{log} 2>&1
36-
37-
echo "--analysis_software_version $(rgi main --version)" > {output.metadata}
38-
echo "--reference_database_version $(rgi database --version)" >> {output.metadata}
39-
"""
34+
"""{{
35+
# Inconveniently we need to cd to the output directory because 'rgi load' writes
36+
# its database where it runs, and we don't want two jobs writing in one location.
37+
# Before we change directory we need to make all file paths absolute.
38+
FNA="$(realpath '{input.contigs}')"
39+
CARD="$(realpath '{input.card_db}')"
40+
META="$(realpath '{output.metadata}')"
41+
mkdir -p {params.out_dir}
42+
cd {params.out_dir}
43+
rgi load -i "$CARD" --local
44+
rgi main --local --clean --input_sequence "$FNA" --output_file rgi --num_threads {threads}
45+
# We extract the database version from the JSON, as 'rgi database -v' gives "N/A"
46+
echo "--analysis_software_version $(rgi main --version) --reference_database_version $(jq -r '._version' "$CARD")" >"$META"
47+
}} >{log} 2>&1
48+
"""
4049

4150
rule hamronize_rgi:
4251
input:
@@ -49,5 +58,5 @@ rule hamronize_rgi:
4958
"../envs/hamronization.yaml"
5059
shell:
5160
"""
52-
hamronize rgi $(paste - - < {input.metadata}) --input_file_name {input.contigs} {input.report} > {output}
61+
hamronize rgi $(cat {input.metadata}) --input_file_name {input.contigs} {input.report} > {output}
5362
"""

rules/rgi_bwt.smk

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,64 @@
11
rule get_rgi_bwt_db:
22
output:
3-
card_db_bwt = os.path.join(config["params"]["db_dir"], "card_bwt", "card.json")
3+
card_db_bwt = os.path.join(config["params"]["db_dir"], "card_bwt", "card.json")
44
params:
5-
db_version = config['params']['rgi']['db_version'],
65
db_dir = os.path.join(config["params"]["db_dir"], "card_bwt")
76
log:
8-
"logs/rgi_db.log"
7+
"logs/rgi_bwt_db.log"
98
shell:
10-
"""
9+
"""{{
1110
mkdir -p {params.db_dir}
12-
curl https://card.mcmaster.ca/latest/data --output {params.db_dir}/card.tar.bz2
11+
wget -c -q -O {params.db_dir}/card.tar.bz2 'https://card.mcmaster.ca/latest/data'
1312
tar -C {params.db_dir} -xvf {params.db_dir}/card.tar.bz2
13+
rm -f {params.db_dir}/card.tar.bz2
14+
}} >{log} 2>&1
1415
"""
1516

1617
rule run_rgi_bwt:
1718
input:
1819
read1 = get_read1,
1920
read2 = get_read2,
20-
card_db_bwt = os.path.join(config["params"]["db_dir"], "card_bwt", "card.json")
21+
card_db = os.path.join(config["params"]["db_dir"], "card_bwt", "card.json")
2122
output:
2223
report = "results/{sample}/rgibwt/rgibwt.gene_mapping_data.txt",
2324
metadata = "results/{sample}/rgibwt/metadata.txt"
2425
message: "Running rule run_rgi_bwt on {wildcards.sample} with reads"
2526
log:
26-
"logs/rgi_bwt_{sample}.log"
27+
"logs/rgi_bwt_{sample}.log"
2728
conda:
28-
"../envs/rgi.yaml"
29+
"../envs/rgi.yaml"
2930
threads:
30-
config["params"]["threads"]
31+
config["params"]["threads"]
3132
params:
32-
output_prefix = "results/{sample}/rgibwt/rgibwt"
33+
out_dir = "results/{sample}/rgibwt"
3334
shell:
34-
"""
35-
rgi card_annotation --input {input.card_db_bwt} > {log} 2>&1
36-
rgi load --card_json {input.card_db_bwt} --card_annotation card_database_v*.fasta >> {log} 2>&1
37-
rm card_database_v*.fasta
38-
rgi bwt --read_one {input.read1} --read_two {input.read2} --output_file {params.output_prefix} --aligner bwa --threads {threads} >>{log} 2>&1
35+
"""{{
36+
# We need to change directory to the output directory because we can't
37+
# control where rgi writes its annotations or "loads" its database;
38+
# and so before this we need to make all paths we use relative to PWD
39+
FQ1="$(realpath '{input.read1}')"
40+
FQ2="$(realpath '{input.read2}')"
41+
CARD="$(realpath '{input.card_db}')"
42+
META="$(realpath '{output.metadata}')"
43+
mkdir -p {params.out_dir}
44+
cd {params.out_dir}
45+
46+
# Figure out the database version as 'rgi database -v' gives "NA"
47+
DB_VER="$(jq -r '._version' "$CARD")"
3948
40-
echo "--analysis_software_version $(rgi main --version)" > {output.metadata}
41-
echo "--reference_database_version $(rgi database --version)" >> {output.metadata}
42-
"""
49+
# Create the annotation files (will be written in PWD)
50+
rgi card_annotation --input "$CARD"
51+
F1="card_database_v${{DB_VER}}.fasta"
52+
F2="card_database_v${{DB_VER}}_all.fasta"
53+
54+
# Now "load" (= create) the database locally and run the tool
55+
rgi load --local -i "$CARD" --card_annotation "$F1" --card_annotation_all_models "$F2"
56+
rm -f "$F1" "$F2"
57+
rgi bwt --local --clean --read_one "$FQ1" --read_two "$FQ2" --output_file "rgibwt" --threads {threads}
58+
59+
echo "--analysis_software_version $(rgi main --version) --reference_database_version $DB_VER" >"$META"
60+
}} >{log} 2>&1
61+
"""
4362

4463
rule hamronize_rgi_bwt:
4564
input:
@@ -52,5 +71,5 @@ rule hamronize_rgi_bwt:
5271
"../envs/hamronization.yaml"
5372
shell:
5473
"""
55-
hamronize rgi $(paste - - < {input.metadata}) --input_file_name {input.read1} {input.report} > {output}
74+
hamronize rgi $(cat {input.metadata}) --input_file_name {input.read1} {input.report} > {output}
5675
"""

test/test_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ params:
1717
read_length: 250 # readlengh to use for indexing the preclustered db
1818
window: 20 # window size to allow min and max read length (min-len = read_length - window)
1919
rgi:
20-
db_version: "3.1.1"
20+
#empty, no options exposed
2121
srax:
2222
dbtype: "basic"
2323
amrplusplus:

0 commit comments

Comments
 (0)