-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSnakefile.raxml
35 lines (30 loc) · 1.13 KB
/
Snakefile.raxml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from Bio import SeqIO
workingdir="/nfs/pgsb/projects/evograph/analysis/paper_triae1/manuscript_whealbi/after_NatureGen_review/unimputed/iupac"
total=int(44746258) #present in 95% of the subgenome-specific samples on median
GENOMES=["B","A","D"]
rule all:
input:
expand("{genome}/RAxML_bestTree.ASC_GTRGAMMA_felsenstein",genome=GENOMES)
rule RAxML:
input:
"{genome}/genome.fasta",
"{genome}/genome.part",
"{genome}/genome.invariant",
workingdir+"/{genome}"
output:
"{genome}/RAxML_bestTree.ASC_GTRGAMMA_felsenstein"
threads: 12
shell:
"/home/pgsb/daniel.lang/software/standard-RAxML/raxmlHPC-PTHREADS-SSE3 -m ASC_GTRGAMMA --JC69 --asc-corr=felsenstein -s {input[0]} -n ASC_GTRGAMMA_felsenstein -T {threads} -p 27042012 -q {input[1]} -w {input[3]}"
rule make_part:
input:
"{genome}/genome.fasta",
output:
"{genome}/genome.part",
"{genome}/genome.invariant"
run:
record = next(SeqIO.parse(input[0], "fasta"))
with open(output[0], "w") as out:
print("[asc~{p1}], ASC_DNA, p1=1-{seqlen}".format(p1=output[1],seqlen=len(record)), file=out)
with open(output[1], "w") as out:
print(str(total-len(record)) , file=out)