-
Notifications
You must be signed in to change notification settings - Fork 0
/
Probs.py
151 lines (140 loc) · 5.33 KB
/
Probs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
This module computes the probabilities and functions defined in the article, for binary trees only.
In order to do that, we need to import these modules.
Variable `a` represents the alpha in Ford's Alpha model.
"""
from __future__ import division
import Shape, PhyloTree, newick
from sage.all import *
from sympy import *
a = var('a')
def Pi(T):
"""
Compute the product in Proposition 2.
:param T: A given `Shape` object (it depends only on the topology of T).
:return: A symbolic expression depending on `a`.
"""
if T.is_leaf or T.children == []:
return 1
elif len(T.children) > 1:
b = T.children[0].count_leaves()
c = T.children[1].count_leaves()
if not b*c == 0:
return phi(b, c)*prod(Pi(x) for x in T.children)
else:
return 1
else:
return 1
def phi(n, m):
"""
Compute phi as defined in the article.
:param n, m: Given integers.
:return: A symbolic expression depending on `a`.
"""
return a/Integer(2)*binomial(n+m, n) + (1-2*a)*binomial(n+m-2, n-1)
def Gamma_alpha(n):
"""
Compute the Gamma-Alpha function of n.
:param n: Given integer.
:return: A symbolic expression depending on `a`.
"""
if n == 2:
return 1 - a
elif n > 2:
return (n - 1 - a)*Gamma_alpha(n-1)
else:
return 0
def prob_shape(T):
"""
Compute the probability of generating the shape of T under the Alpha model by Ford, assuming T is an instance of `Shape` --i.e.,
that it is only the shape of a tree.
:param T: The instance of `Shape` of which we want to compute the probability.
:return: A symbolic expression depending on `a`.
"""
if not bool(T.children):
return 1
T1 = T.shape()
n = T1.count_leaves()
Gamma = Gamma_alpha(n)
if Gamma == 0:
return "ERROR: division by zero"
else:
k = T1.count_symmetries()
eq = 2 ** (n - k - 1) / Gamma * Pi(T)
return eq.factor()
def prob_tree(T):
"""
Compute the probability of generating T under the Alpha model by Ford, assuming T is an instance of `PhyloTree`.
Given an instance of `Shape`, it computes the probability of a phylogenetic tree with its shape.
:param T: The instance of `PhyloTree` of which we want to compute the probability.
:return: A symbolic expression depending on `a`.
"""
if not bool(T.children):
return 1
n = T.count_leaves()
Gamma = Gamma_alpha(n)
if Gamma == 0:
return "ERROR: division by zero"
else:
eq = 2 ** (n - 1) / (factorial(n)*Gamma) * Pi(T)
return eq.factor()
def prob(T):
"""
Compute the probability of generating T under the Alpha model by Ford.
Given an instance of `Shape`, it computes the probability of generating the given shape.
Given an instance of `PhyloTree` that is a phylogenetic tree --i.e., such that does not have repeated labels--, it
computes the probability of generating the given tree.
:param T: The instance of `Shape` of which we want to compute the probability.
:return: A symbolic expression depending on `a`.
"""
if not bool(T.children):
return 1
n = T.count_leaves()
Gamma = Gamma_alpha(n)
if Gamma == 0:
return "ERROR: division by zero"
else:
k = T.count_symmetries()
nlabels = len(T.labels())
eq = 2**(n-k-1) / (factorial(nlabels)*Gamma) * Pi(T)
return eq.factor()
def shape_probs_from_list(X):
"""
Return the probabilities of all the instances of `Shape` in a list (in string) of Newick codes.
:param X: a string containing several Newick codes.
:return: A list of symbolic expressions depending on `a`.
"""
l = Shape.from_newick_list(X)
return [prob_shape(t) for t in l]
def tree_probs_from_list(X):
"""
Return the probabilities of all the instances of `PhyloTree` in a list (in string) of Newick codes.
:param X: a string containing several Newick codes.
:return: A list of symbolic expressions depending on `a`.
"""
l = PhyloTree.from_newick_list(X)
return [prob_tree(t) for t in l]
def shape_probs_from_file(fname, encoding='utf8', strip_comments=False, **kw):
"""
Load a list of instances of `Shape` from a Newick formatted file and return their probabilities in a list, assuming
they are instances of `Shape`.
:param fname: file path.
:param strip_comments: Flag signaling whether to strip comments enclosed in square \
brackets.
:param kw: Keyword arguments are passed through to `Node.read`.
:return: A list of symbolic expressions depending on `a`.
"""
l = newick.read(fname, encoding, strip_comments, **kw)
return [prob_shape(Shape.newick_node_to_shape(t)) for t in l]
def tree_probs_from_file(fname, encoding='utf8', strip_comments=False, **kw):
"""
Load a list of instances of `PhyloTree` from a Newick formatted file and return their probabilities in a list, assuming
they are instances of `PhyloTree`.
:param fname: file path.
:param strip_comments: Flag signaling whether to strip comments enclosed in square \
brackets.
:param kw: Keyword arguments are passed through to `Node.read`.
:return: A list of symbolic expressions depending on `a`.
"""
l = newick.read(fname, encoding, strip_comments, **kw)
return [prob_tree(PhyloTree.newick_node_to_tree(t)) for t in l]