From 179646a5d57ebae30fe99302128dc400bf391dfc Mon Sep 17 00:00:00 2001 From: Francisco Zorrilla Date: Fri, 9 Jun 2023 16:40:12 +0100 Subject: [PATCH 1/3] feature: add Snakefile rule for eggnog-mapper create Snakefile rule to annotate genomes with eggnog-mapper --- workflow/Snakefile | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/workflow/Snakefile b/workflow/Snakefile index 22a9e06..9ca4add 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -1021,6 +1021,34 @@ rule binningVis: rm Rplots.pdf """ +rule eggnog_mapper: + input: + f'{config["path"]["root"]}/final_mags/' + output: + directory(f'{config["path"]["root"]}/emapper/') + benchmark: + f'{config["path"]["root"]}/{config["folder"]["benchmarks"]}/{{IDs}}.eggnog_mapper.benchmark.txt' + message: + """ + batch process small number of genomes with eggnog mapper, make sure you have downloaded and + configured the eggnog database path, assumes that you have ORF annotated protein fasta files + (e.g. translated from DNA using prodigal) in final_mags folder, then will loop through each + faa file and run eggnog to produce gene annotations files, Warning: this can take a few + minutes and load up to 45GB to RAM + """ + + shell: + """ + # Activate eggnog environment + set +u;source activate eggnog6;set -u; + + cd {input} + + while read protein;do + emapper.py -m diamond -i $protein -o ${protein%.*} --cpu 18 --dbmem --excel --scratch_dir . + done< <(ls|grep faa) + """ + rule abundance: input: bins = f'{config["path"]["root"]}/{config["folder"]["reassembled"]}/{{IDs}}/reassembled_bins', From ab7e8f66ff5ff7df8ea8c29b22a553a9dd146965 Mon Sep 17 00:00:00 2001 From: Francisco Zorrilla Date: Fri, 9 Jun 2023 16:48:24 +0100 Subject: [PATCH 2/3] Update Snakefile add line to create dummy output folder, still need to replace hardcoded cores number with new param, same with eggnog env --- workflow/Snakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/workflow/Snakefile b/workflow/Snakefile index 9ca4add..42c87f6 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -1043,6 +1043,7 @@ rule eggnog_mapper: set +u;source activate eggnog6;set -u; cd {input} + mkdir -p {output} while read protein;do emapper.py -m diamond -i $protein -o ${protein%.*} --cpu 18 --dbmem --excel --scratch_dir . From 725928f14fec20484e0bbb951ae7f0adf00fe6ef Mon Sep 17 00:00:00 2001 From: Francisco Zorrilla Date: Fri, 9 Jun 2023 17:18:46 +0100 Subject: [PATCH 3/3] Update Snakefile eggnog_mapper rule --- workflow/Snakefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 42c87f6..e36c80b 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -1044,9 +1044,12 @@ rule eggnog_mapper: cd {input} mkdir -p {output} - - while read protein;do - emapper.py -m diamond -i $protein -o ${protein%.*} --cpu 18 --dbmem --excel --scratch_dir . + + while read protein;do + mkdir -p tmp; + emapper.py -m diamond -i $protein -o ${protein%.*} --cpu 18 --dbmem --excel --scratch_dir tmp; + mv *.emapper.* {output} + rm -r tmp; done< <(ls|grep faa) """