-
Notifications
You must be signed in to change notification settings - Fork 3
SnakeMake
Pierre Lindenbaum edited this page Jul 29, 2015
·
1 revision
convert Makefile to snakemake: https://bitbucket.org/johanneskoester/snakemake/wiki/Home
.PHONY: all all_fasta clean
GILIST=52854274 156118490 290782623 209485592 149126991 254749437 269857780 14971105 256041807 269857713
%.fa:
$(description $@,download gi:$(basename $@) from NCBI as fasta)wget -O "$@" "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=$(basename $@)&retmode=text&rettype=fasta"
all: all_fasta
echo "Done"
all_fasta: longest.fa
longest.fa : all.fa
$(description $@,get the longest sequence in $<)awk '/^>/ { printf("%s%s\t",(NR==1?"":"\n"),$$0);next;} { printf("%s",$$0);} END {printf("\n");}' $< |\
awk -F '\t' '{printf("%d\t%s\n",length($$2),$$0);}' | sort -t ' ' -k1,1n | tail -n1 | cut -f 2- |\
tr "\t" "\n" > $@
all.fa : $(addsuffix .fa,${GILIST})
$(description $@,concatenate everything)cat $^ > $@
clean:
rm -f $(addsuffix .fa,${GILIST}) longest.fa
$ ../make-4.1/bin/xml-make4.1 --xml test03.xml -f test03.mk all
content of test03.xml:
<?xml version="1.0" encoding="UTF-8"?>
<make shell="/bin/sh" shellflags="-c" path="/home/lindenb/package/jdk1.8.0_40/bin:/home/lindenb/package/eclipse:/home/lindenb/package/jdk1.8.0_40/bin:/home/lindenb/package/eclipse:/home/lindenb/package/firefox:/home/lindenb/bin:/usr/lib/lightdm/lightdm:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/home/lindenb/package/edirect" pwd="/home/lindenb/src/xml-patch-make/tests">
<target name="52854274.fa" description="download gi:52854274 from NCBI as fasta" id="1" precious="0" phony="0">
<statements>
<statement>wget -O "52854274.fa" "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=52854274&retmode=text&rettype=fasta"</statement>
</statements>
</target>
<target name="156118490.fa" description="download gi:156118490 from NCBI as fasta" id="2" precious="0" phony="0">
<statements>
<statement>wget -O "156118490.fa" "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=156118490&retmode=text&rettype=fasta"</statement>
</statements>
</target>
<target name="290782623.fa" description="download gi:290782623 from NCBI as fasta" id="3" precious="0" phony="0">
<statements>
<statement>wget -O "290782623.fa" "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=290782623&retmode=text&rettype=fasta"</statement>
</statements>
</target>
<target name="209485592.fa" description="download gi:209485592 from NCBI as fasta" id="4" precious="0" phony="0">
<statements>
<statement>wget -O "209485592.fa" "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=209485592&retmode=text&rettype=fasta"</statement>
</statements>
</target>
<target name="149126991.fa" description="download gi:149126991 from NCBI as fasta" id="5" precious="0" phony="0">
<statements>
<statement>wget -O "149126991.fa" "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=149126991&retmode=text&rettype=fasta"</statement>
</statements>
</target>
<target name="254749437.fa" description="download gi:254749437 from NCBI as fasta" id="6" precious="0" phony="0">
<statements>
<statement>wget -O "254749437.fa" "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=254749437&retmode=text&rettype=fasta"</statement>
</statements>
</target>
<target name="269857780.fa" description="download gi:269857780 from NCBI as fasta" id="7" precious="0" phony="0">
<statements>
<statement>wget -O "269857780.fa" "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=269857780&retmode=text&rettype=fasta"</statement>
</statements>
</target>
<target name="14971105.fa" description="download gi:14971105 from NCBI as fasta" id="8" precious="0" phony="0">
<statements>
<statement>wget -O "14971105.fa" "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=14971105&retmode=text&rettype=fasta"</statement>
</statements>
</target>
<target name="256041807.fa" description="download gi:256041807 from NCBI as fasta" id="9" precious="0" phony="0">
<statements>
<statement>wget -O "256041807.fa" "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=256041807&retmode=text&rettype=fasta"</statement>
</statements>
</target>
<target name="269857713.fa" description="download gi:269857713 from NCBI as fasta" id="10" precious="0" phony="0">
<statements>
<statement>wget -O "269857713.fa" "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=269857713&retmode=text&rettype=fasta"</statement>
</statements>
</target>
<target name="all.fa" description="concatenate everything" id="11" precious="0" phony="0">
<prerequisites>
<prerequisite name="52854274.fa" ref="1"/>
<prerequisite name="156118490.fa" ref="2"/>
<prerequisite name="290782623.fa" ref="3"/>
<prerequisite name="209485592.fa" ref="4"/>
<prerequisite name="149126991.fa" ref="5"/>
<prerequisite name="254749437.fa" ref="6"/>
<prerequisite name="269857780.fa" ref="7"/>
<prerequisite name="14971105.fa" ref="8"/>
<prerequisite name="256041807.fa" ref="9"/>
<prerequisite name="269857713.fa" ref="10"/>
</prerequisites>
<statements>
<statement>cat 52854274.fa 156118490.fa 290782623.fa 209485592.fa 149126991.fa 254749437.fa 269857780.fa 14971105.fa 256041807.fa 269857713.fa > all.fa</statement>
<statement/>
</statements>
</target>
<target name="longest.fa" description="get the longest sequence in all.fa" id="12" precious="0" phony="0">
<prerequisites>
<prerequisite name="all.fa" ref="11"/>
</prerequisites>
<statements>
<statement>awk '/^>/ { printf("%s%s\t",(NR==1?"":"\n"),$0);next;} { printf("%s",$0);} END {printf("\n");}' all.fa |\
awk -F '\t' '{printf("%d\t%s\n",length($2),$0);}' | sort -t ' ' -k1,1n | tail -n1 | cut -f 2- |\
tr "\t" "\n" > longest.fa</statement>
</statements>
</target>
<target name="all_fasta" description="all_fasta" id="13" precious="0" phony="1">
<prerequisites>
<prerequisite name="longest.fa" ref="12"/>
</prerequisites>
</target>
<target name="all" description="all" id="14" precious="0" phony="1">
<prerequisites>
<prerequisite name="all_fasta" ref="13"/>
</prerequisites>
<statements>
<statement>echo "Done"</statement>
</statements>
</target>
</make>
$ xsltproc ../stylesheets/graph2snake.xsl test03.xml > Snakefile
content of the Snakefile:
shell.executable("/bin/sh")
rule rule14:
"""all"""
output: '__14_phony.flag'
input: '__13_phony.flag'
shell:
"touch '__14_phony.flag';" \
"echo \"Done\""
rule rule13:
"""all_fasta"""
output: '__13_phony.flag'
input: 'longest.fa'
shell:
"touch '__13_phony.flag';" \
"echo '__13_phony.flag'"
rule rule12:
"""get the longest sequence in all.fa"""
output: 'longest.fa'
input: 'all.fa'
shell: "awk '/^>/ {{ printf(\"%s%s\\t\",(NR==1?\"\":\"\\n\"),$0);next;}} {{ printf(\"%s\",$0);}} END {{printf(\"\\n\");}}' all.fa | awk -F '\\t' '{{printf(\"%d\\t%s\\n\",length($2),$0);}}' | sort -t ' ' -k1,1n | tail -n1 | cut -f 2- | tr \"\\t\" \"\\n\" > longest.fa"
rule rule11:
"""concatenate everything"""
output: 'all.fa'
input: '52854274.fa' , '156118490.fa' , '290782623.fa' , '209485592.fa' , '149126991.fa' , '254749437.fa' , '269857780.fa' , '14971105.fa' , '256041807.fa' , '269857713.fa'
shell: "cat 52854274.fa 156118490.fa 290782623.fa 209485592.fa 149126991.fa 254749437.fa 269857780.fa 14971105.fa 256041807.fa 269857713.fa > all.fa ; " \
""
rule rule10:
"""download gi:269857713 from NCBI as fasta"""
output: '269857713.fa'
shell: "wget -O \"269857713.fa\" \"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=269857713&retmode=text&rettype=fasta\""
rule rule9:
"""download gi:256041807 from NCBI as fasta"""
output: '256041807.fa'
shell: "wget -O \"256041807.fa\" \"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=256041807&retmode=text&rettype=fasta\""
rule rule8:
"""download gi:14971105 from NCBI as fasta"""
output: '14971105.fa'
shell: "wget -O \"14971105.fa\" \"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=14971105&retmode=text&rettype=fasta\""
rule rule7:
"""download gi:269857780 from NCBI as fasta"""
output: '269857780.fa'
shell: "wget -O \"269857780.fa\" \"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=269857780&retmode=text&rettype=fasta\""
rule rule6:
"""download gi:254749437 from NCBI as fasta"""
output: '254749437.fa'
shell: "wget -O \"254749437.fa\" \"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=254749437&retmode=text&rettype=fasta\""
rule rule5:
"""download gi:149126991 from NCBI as fasta"""
output: '149126991.fa'
shell: "wget -O \"149126991.fa\" \"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=149126991&retmode=text&rettype=fasta\""
rule rule4:
"""download gi:209485592 from NCBI as fasta"""
output: '209485592.fa'
shell: "wget -O \"209485592.fa\" \"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=209485592&retmode=text&rettype=fasta\""
rule rule3:
"""download gi:290782623 from NCBI as fasta"""
output: '290782623.fa'
shell: "wget -O \"290782623.fa\" \"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=290782623&retmode=text&rettype=fasta\""
rule rule2:
"""download gi:156118490 from NCBI as fasta"""
output: '156118490.fa'
shell: "wget -O \"156118490.fa\" \"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=156118490&retmode=text&rettype=fasta\""
rule rule1:
"""download gi:52854274 from NCBI as fasta"""
output: '52854274.fa'
shell: "wget -O \"52854274.fa\" \"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=52854274&retmode=text&rettype=fasta\""
invoke snakemake
lindenb@hardyweinberg:~/src/xml-patch-make/tests$ ../snakemake/bin/snakemake
Provided cores: 1
Rules claiming more threads will be scaled down.
Job counts:
count jobs
1 rule1
1 rule10
1 rule11
1 rule12
1 rule13
1 rule14
1 rule2
1 rule3
1 rule4
1 rule5
1 rule6
1 rule7
1 rule8
1 rule9
14
rule rule1:
output: 52854274.fa
--2015-07-29 17:25:01-- http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=52854274&retmode=text&rettype=fasta
Resolving cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)... 193.52.104.20, 2001:660:7220:386:193:52:104:20
Connecting to cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)|193.52.104.20|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: `52854274.fa'
[ <=> ] 325 --.-K/s in 0s
2015-07-29 17:25:01 (18.3 MB/s) - `52854274.fa' saved [325]
1 of 14 steps (7%) done
rule rule3:
output: 290782623.fa
--2015-07-29 17:25:01-- http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=290782623&retmode=text&rettype=fasta
Resolving cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)... 193.52.104.20, 2001:660:7220:386:193:52:104:20
Connecting to cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)|193.52.104.20|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: `290782623.fa'
[ <=> ] 370 --.-K/s in 0s
2015-07-29 17:25:01 (20.1 MB/s) - `290782623.fa' saved [370]
2 of 14 steps (14%) done
rule rule5:
output: 149126991.fa
--2015-07-29 17:25:01-- http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=149126991&retmode=text&rettype=fasta
Resolving cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)... 193.52.104.20, 2001:660:7220:386:193:52:104:20
Connecting to cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)|193.52.104.20|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: `149126991.fa'
[ <=> ] 384 --.-K/s in 0s
2015-07-29 17:25:02 (22.7 MB/s) - `149126991.fa' saved [384]
3 of 14 steps (21%) done
rule rule4:
output: 209485592.fa
--2015-07-29 17:25:02-- http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=209485592&retmode=text&rettype=fasta
Resolving cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)... 193.52.104.20, 2001:660:7220:386:193:52:104:20
Connecting to cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)|193.52.104.20|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: `209485592.fa'
[ <=> ] 384 --.-K/s in 0s
2015-07-29 17:25:02 (22.9 MB/s) - `209485592.fa' saved [384]
4 of 14 steps (29%) done
rule rule9:
output: 256041807.fa
--2015-07-29 17:25:02-- http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=256041807&retmode=text&rettype=fasta
Resolving cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)... 193.52.104.20, 2001:660:7220:386:193:52:104:20
Connecting to cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)|193.52.104.20|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: `256041807.fa'
[ <=> ] 400 --.-K/s in 0s
2015-07-29 17:25:03 (23.1 MB/s) - `256041807.fa' saved [400]
5 of 14 steps (36%) done
rule rule2:
output: 156118490.fa
--2015-07-29 17:25:03-- http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=156118490&retmode=text&rettype=fasta
Resolving cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)... 193.52.104.20, 2001:660:7220:386:193:52:104:20
Connecting to cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)|193.52.104.20|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: `156118490.fa'
[ <=> ] 367 --.-K/s in 0s
2015-07-29 17:25:05 (31.5 MB/s) - `156118490.fa' saved [367]
6 of 14 steps (43%) done
rule rule10:
output: 269857713.fa
--2015-07-29 17:25:05-- http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=269857713&retmode=text&rettype=fasta
Resolving cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)... 193.52.104.20, 2001:660:7220:386:193:52:104:20
Connecting to cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)|193.52.104.20|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: `269857713.fa'
[ <=> ] 398 --.-K/s in 0s
2015-07-29 17:25:05 (21.9 MB/s) - `269857713.fa' saved [398]
7 of 14 steps (50%) done
rule rule6:
output: 254749437.fa
--2015-07-29 17:25:05-- http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=254749437&retmode=text&rettype=fasta
Resolving cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)... 193.52.104.20, 2001:660:7220:386:193:52:104:20
Connecting to cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)|193.52.104.20|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: `254749437.fa'
[ <=> ] 397 --.-K/s in 0s
2015-07-29 17:25:05 (22.4 MB/s) - `254749437.fa' saved [397]
8 of 14 steps (57%) done
rule rule7:
output: 269857780.fa
--2015-07-29 17:25:05-- http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=269857780&retmode=text&rettype=fasta
Resolving cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)... 193.52.104.20, 2001:660:7220:386:193:52:104:20
Connecting to cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)|193.52.104.20|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: `269857780.fa'
[ <=> ] 401 --.-K/s in 0s
2015-07-29 17:25:05 (22.5 MB/s) - `269857780.fa' saved [401]
9 of 14 steps (64%) done
rule rule8:
output: 14971105.fa
--2015-07-29 17:25:05-- http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=14971105&retmode=text&rettype=fasta
Resolving cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)... 193.52.104.20, 2001:660:7220:386:193:52:104:20
Connecting to cache.ha.univ-nantes.fr (cache.ha.univ-nantes.fr)|193.52.104.20|:3128... connected.
Proxy request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: `14971105.fa'
[ <=> ] 379 --.-K/s in 0s
2015-07-29 17:25:05 (21.0 MB/s) - `14971105.fa' saved [379]
10 of 14 steps (71%) done
rule rule11:
input: 52854274.fa, 156118490.fa, 290782623.fa, 209485592.fa, 149126991.fa, 254749437.fa, 269857780.fa, 14971105.fa, 256041807.fa, 269857713.fa
output: all.fa
11 of 14 steps (79%) done
rule rule12:
input: all.fa
output: longest.fa
12 of 14 steps (86%) done
rule rule13:
input: longest.fa
output: __13_phony.flag
__13_phony.flag
13 of 14 steps (93%) done
rule rule14:
input: __13_phony.flag
output: __14_phony.flag
Done
14 of 14 steps (100%) done