Skip to content

Commit c5b5699

Browse files
committed
Fixing approximate sparse filtrations, diffusion maps, and wasserstein
1 parent 243729b commit c5b5699

5 files changed

+46
-251
lines changed

Approximate Sparse Filtrations.ipynb

+25-9
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@
2727
"metadata": {},
2828
"outputs": [],
2929
"source": [
30-
"from ripser import ripser, plot_dgms\n",
30+
"from ripser import ripser\n",
31+
"from persim import plot_diagrams, wasserstein, wasserstein_matching\n",
3132
"import numpy as np\n",
3233
"import matplotlib.pyplot as plt\n",
3334
"from sklearn.metrics.pairwise import pairwise_distances\n",
3435
"from scipy import sparse\n",
35-
"from DGMTools import *\n",
3636
"import time"
3737
]
3838
},
@@ -227,19 +227,35 @@
227227
"#And plot the persistence diagrams on top of each other\n",
228228
"plt.figure(figsize=(10, 5))\n",
229229
"plt.subplot(121)\n",
230-
"plot_dgms(resultfull['dgms'], show=False)\n",
230+
"plot_diagrams(resultfull['dgms'], show=False)\n",
231231
"plt.title(\"Full Filtration: Elapsed Time %g Seconds\"%timefull)\n",
232232
"plt.subplot(122)\n",
233233
"plt.title(\"Sparse Filtration (%.3g%% Added)\\nElapsed Time %g Seconds\"%(percent_added, timesparse))\n",
234-
"plot_dgms(resultsparse['dgms'], show=False)\n",
235-
"\n",
234+
"plot_diagrams(resultsparse['dgms'], show=False)\n",
235+
"\n"
236+
]
237+
},
238+
{
239+
"cell_type": "markdown",
240+
"metadata": {},
241+
"source": [
242+
"We can also compute the Wasserstein distance between the two diagrams to see how close they are. Try this with different approximations above"
243+
]
244+
},
245+
{
246+
"cell_type": "code",
247+
"execution_count": null,
248+
"metadata": {},
249+
"outputs": [],
250+
"source": [
236251
"#Show the Wasserstein distance for H1\n",
237252
"I1 = resultfull['dgms'][1]\n",
238253
"I2 = resultsparse['dgms'][1]\n",
239-
"(matchidxw, wdist, wD) = getWassersteinDist(I1, I2)\n",
254+
"matchdist, (matchidx, D) = wasserstein(I1, I2, matching=True)\n",
240255
"plt.figure(figsize=(5, 5))\n",
241-
"plotWassersteinMatching(I1, I2, matchidxw, labels = ['exact', '$\\epsilon = %.3g$'%eps])\n",
242-
"plt.title(\"Wasserstein Dist: %.3g\"%wdist)\n"
256+
"#Plot wasserstein matching\n",
257+
"wasserstein_matching(I1, I2, matchidx, labels = ['exact', '$\\epsilon = %.3g$'%eps])\n",
258+
"plt.title(\"Wasserstein Dist: %.3g\"%matchdist)\n"
243259
]
244260
},
245261
{
@@ -266,7 +282,7 @@
266282
"name": "python",
267283
"nbconvert_exporter": "python",
268284
"pygments_lexer": "ipython3",
269-
"version": "3.6.4"
285+
"version": "3.7.1"
270286
}
271287
},
272288
"nbformat": 4,

DGMTools.py

+2-224
Original file line numberDiff line numberDiff line change
@@ -11,181 +11,13 @@
1111
import time #For timing kernel comparison
1212
import sklearn.metrics.pairwise
1313
import scipy.stats
14-
from ripser import ripser, plot_dgms
15-
16-
##############################################################################
17-
########## Plotting Functions ##########
18-
##############################################################################
19-
20-
def plotBottleneckMatching(I1, I2, matchidx, D, labels = ['dgm1', 'dgm2']):
21-
plot_dgms([I1, I2], labels = labels)
22-
cp = np.cos(np.pi/4)
23-
sp = np.sin(np.pi/4)
24-
R = np.array([[cp, -sp], [sp, cp]])
25-
if I1.size == 0:
26-
I1 = np.array([[0, 0]])
27-
if I2.size == 0:
28-
I2 = np.array([[0, 0]])
29-
I1Rot = I1.dot(R)
30-
I2Rot = I2.dot(R)
31-
dists = [D[i, j] for (i, j) in matchidx]
32-
(i, j) = matchidx[np.argmax(dists)]
33-
if i >= I1.shape[0] and j >= I2.shape[0]:
34-
return
35-
if i >= I1.shape[0]:
36-
diagElem = np.array([I2Rot[j, 0], 0])
37-
diagElem = diagElem.dot(R.T)
38-
plt.plot([I2[j, 0], diagElem[0]], [I2[j, 1], diagElem[1]], 'g')
39-
elif j >= I2.shape[0]:
40-
diagElem = np.array([I1Rot[i, 0], 0])
41-
diagElem = diagElem.dot(R.T)
42-
plt.plot([I1[i, 0], diagElem[0]], [I1[i, 1], diagElem[1]], 'g')
43-
else:
44-
plt.plot([I1[i, 0], I2[j, 0]], [I1[i, 1], I2[j, 1]], 'g')
45-
46-
47-
def plotWassersteinMatching(I1, I2, matchidx, labels = ['dgm1', 'dgm2']):
48-
plot_dgms([I1, I2], labels = labels)
49-
cp = np.cos(np.pi/4)
50-
sp = np.sin(np.pi/4)
51-
R = np.array([[cp, -sp], [sp, cp]])
52-
if I1.size == 0:
53-
I1 = np.array([[0, 0]])
54-
if I2.size == 0:
55-
I2 = np.array([[0, 0]])
56-
I1Rot = I1.dot(R)
57-
I2Rot = I2.dot(R)
58-
for index in matchidx:
59-
(i, j) = index
60-
if i >= I1.shape[0] and j >= I2.shape[0]:
61-
continue
62-
if i >= I1.shape[0]:
63-
diagElem = np.array([I2Rot[j, 0], 0])
64-
diagElem = diagElem.dot(R.T)
65-
plt.plot([I2[j, 0], diagElem[0]], [I2[j, 1], diagElem[1]], 'g')
66-
elif j >= I2.shape[0]:
67-
diagElem = np.array([I1Rot[i, 0], 0])
68-
diagElem = diagElem.dot(R.T)
69-
plt.plot([I1[i, 0], diagElem[0]], [I1[i, 1], diagElem[1]], 'g')
70-
else:
71-
plt.plot([I1[i, 0], I2[j, 0]], [I1[i, 1], I2[j, 1]], 'g')
14+
from ripser import ripser
7215

7316

7417
##############################################################################
7518
########## Diagram Comparison Functions ##########
7619
##############################################################################
7720

78-
def getWassersteinDist(S, T):
79-
"""
80-
Perform the Wasserstein distance matching between persistence diagrams.
81-
Assumes first two columns of S and T are the coordinates of the persistence
82-
points, but allows for other coordinate columns (which are ignored in
83-
diagonal matching)
84-
:param S: Mx(>=2) array of birth/death pairs for PD 1
85-
:param T: Nx(>=2) array of birth/death paris for PD 2
86-
:returns (tuples of matched indices, total cost, (N+M)x(N+M) cross-similarity)
87-
"""
88-
import hungarian #Requires having compiled the library
89-
90-
# Step 1: Compute CSM between S and T, including points on diagonal
91-
N = S.shape[0]
92-
M = T.shape[0]
93-
#Handle the cases where there are no points in the diagrams
94-
if N == 0:
95-
S = np.array([[0, 0]])
96-
N = 1
97-
if M == 0:
98-
T = np.array([[0, 0]])
99-
M = 1
100-
DUL = sklearn.metrics.pairwise.pairwise_distances(S, T)
101-
102-
#Put diagonal elements into the matrix
103-
#Rotate the diagrams to make it easy to find the straight line
104-
#distance to the diagonal
105-
cp = np.cos(np.pi/4)
106-
sp = np.sin(np.pi/4)
107-
R = np.array([[cp, -sp], [sp, cp]])
108-
S = S[:, 0:2].dot(R)
109-
T = T[:, 0:2].dot(R)
110-
D = np.zeros((N+M, N+M))
111-
D[0:N, 0:M] = DUL
112-
UR = np.max(D)*np.ones((N, N))
113-
np.fill_diagonal(UR, S[:, 1])
114-
D[0:N, M:M+N] = UR
115-
UL = np.max(D)*np.ones((M, M))
116-
np.fill_diagonal(UL, T[:, 1])
117-
D[N:M+N, 0:M] = UL
118-
D = D.tolist()
119-
120-
# Step 2: Run the hungarian algorithm
121-
matchidx = hungarian.lap(D)[0]
122-
matchidx = [(i, matchidx[i]) for i in range(len(matchidx))]
123-
matchdist = 0
124-
for pair in matchidx:
125-
(i, j) = pair
126-
matchdist += D[i][j]
127-
128-
return (matchidx, matchdist, D)
129-
130-
def getBottleneckDist(S, T):
131-
"""
132-
Perform the Bottleneck distance matching between persistence diagrams.
133-
Assumes first two columns of S and T are the coordinates of the persistence
134-
points, but allows for other coordinate columns (which are ignored in
135-
diagonal matching)
136-
:param S: Mx(>=2) array of birth/death pairs for PD 1
137-
:param T: Nx(>=2) array of birth/death paris for PD 2
138-
:returns (tuples of matched indices, total cost, (N+M)x(N+M) cross-similarity)
139-
"""
140-
from bisect import bisect_left
141-
from hopcroftkarp import HopcroftKarp
142-
143-
N = S.shape[0]
144-
M = T.shape[0]
145-
# Step 1: Compute CSM between S and T, including points on diagonal
146-
# L Infinity distance
147-
Sb, Sd = S[:, 0], S[:, 1]
148-
Tb, Td = T[:, 0], T[:, 1]
149-
D1 = np.abs(Sb[:, None] - Tb[None, :])
150-
D2 = np.abs(Sd[:, None] - Td[None, :])
151-
DUL = np.maximum(D1, D2)
152-
# Put diagonal elements into the matrix, being mindful that Linfinity
153-
# balls meet the diagonal line at a diamond vertex
154-
D = np.zeros((N+M, N+M))
155-
D[0:N, 0:M] = DUL
156-
UR = np.max(D)*np.ones((N, N))
157-
np.fill_diagonal(UR, 0.5*(S[:, 1]-S[:, 0]))
158-
D[0:N, M::] = UR
159-
UL = np.max(D)*np.ones((M, M))
160-
np.fill_diagonal(UL, 0.5*(T[:, 1]-T[:, 0]))
161-
D[N::, 0:M] = UL
162-
163-
# Step 2: Perform a binary search + Hopcroft Karp to find the
164-
# bottleneck distance
165-
N = D.shape[0]
166-
ds = np.unique(D.flatten())
167-
ds = ds[ds > 0]
168-
ds = np.sort(ds)
169-
bdist = ds[-1]
170-
matching = {}
171-
while len(ds) >= 1:
172-
idx = 0
173-
if len(ds) > 1:
174-
idx = bisect_left(range(ds.size), int(ds.size/2))
175-
d = ds[idx]
176-
graph = {}
177-
for i in range(N):
178-
graph['%s'%i] = {j for j in range(N) if D[i, j] <= d}
179-
res = HopcroftKarp(graph).maximum_matching()
180-
if len(res) == 2*N and d < bdist:
181-
bdist = d
182-
matching = res
183-
ds = ds[0:idx]
184-
else:
185-
ds = ds[idx+1::]
186-
matchidx = [(i, matching['%i'%i]) for i in range(N)]
187-
return (matchidx, bdist, D)
188-
18921

19022
def sortAndGrab(dgm, NBars = 10, BirthTimes = False):
19123
"""
@@ -301,58 +133,4 @@ def getPersistenceImage(dgm, plims, res, weightfn = lambda b, l: l, psigma = Non
301133
X = ycdf[:, None]*xcdf[None, :]
302134
#Integral image
303135
PI += weightfn(x, y)*(X[1::, 1::] - X[0:-1, 1::] - X[1::, 0:-1] + X[0:-1, 0:-1])
304-
return {'PI':PI, 'xr':xr[0:-1], 'yr':yr[0:-1]}
305-
306-
def testBottleneckWassersteinNoisyCircle():
307-
N = 400
308-
np.random.seed(N)
309-
t = np.linspace(0, 2*np.pi, N+1)[0:N]
310-
X = np.zeros((N, 2))
311-
X[:, 0] = np.cos(t)
312-
X[:, 1] = np.sin(t)
313-
I1 = ripser(X)['dgms'][1]
314-
X2 = X + 0.1*np.random.randn(N, 2)
315-
I2 = ripser(X2)['dgms'][1]
316-
tic = time.time()
317-
(matchidxb, bdist, bD) = getBottleneckDist(I1, I2)
318-
btime = time.time() - tic
319-
tic = time.time()
320-
(matchidxw, wdist, wD) = getWassersteinDist(I1, I2)
321-
wtime = time.time() - tic
322-
print("Elapsed Time Bottleneck: %.3g\nElapsed Time Wasserstein: %.3g"%(btime, wtime))
323-
324-
plt.figure(figsize=(12, 6))
325-
plt.subplot(121)
326-
plotBottleneckMatching(I1, I2, matchidxb, bD)
327-
plt.title("Bottleneck Dist: %.3g"%bdist)
328-
plt.subplot(122)
329-
plotWassersteinMatching(I1, I2, matchidxw)
330-
plt.title("Wasserstein Dist: %.3g"%wdist)
331-
plt.show()
332-
333-
def writePD(I, filename):
334-
fout = open(filename, "w")
335-
for i in range(I.shape[0]):
336-
fout.write("%g %g"%(I[i, 0], I[i, 1]))
337-
if i < I.shape[0]-1:
338-
fout.write("\n")
339-
fout.close()
340-
341-
def testBottleneckVsHera(NTrials = 10):
342-
import subprocess
343-
for trial in range(NTrials):
344-
x = np.random.randn(100, 2)
345-
y = np.random.randn(100, 2)
346-
I1 = ripser(x)['dgms'][1]
347-
I2 = ripser(y)['dgms'][1]
348-
(matchidxb, bdist, D) = getBottleneckDist(I1, I2)
349-
print(bdist)
350-
writePD(I1, "PD1.txt")
351-
writePD(I2, "PD2.txt")
352-
subprocess.call(["./bottleneck_dist", "PD1.txt", "PD2.txt"])
353-
print("\n")
354-
355-
356-
if __name__ == '__main__':
357-
#testBottleneckVsHera()
358-
testBottleneckWassersteinNoisyCircle()
136+
return {'PI':PI, 'xr':xr[0:-1], 'yr':yr[0:-1]}

DiffusionMapsAndTDA.ipynb

+5-5
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
"import matplotlib.pyplot as plt\n",
2121
"from mpl_toolkits.mplot3d import Axes3D\n",
2222
"from matplotlib.offsetbox import OffsetImage, AnnotationBbox\n",
23-
"from ripser import ripser, plot_dgms\n",
24-
"from GeomUtils import getGreedyPerm\n",
23+
"from ripser import ripser\n",
24+
"from persim import plot_diagrams\n",
2525
"from DiffusionMaps import *"
2626
]
2727
},
@@ -83,7 +83,7 @@
8383
"plt.imshow(SSMOrig, interpolation = 'nearest', cmap = 'afmhot')\n",
8484
"plt.title(\"Distance Matrix\")\n",
8585
"plt.subplot(233)\n",
86-
"plot_dgms(dgms1)\n",
86+
"plot_diagrams(dgms1)\n",
8787
"plt.subplot(234)\n",
8888
"plt.scatter(X[:, 0], X[:, 1], 40, np.arange(N), cmap = 'Spectral', edgecolor = 'none')\n",
8989
"plt.title(\"Diffusion Maps 2D Projection\")\n",
@@ -92,7 +92,7 @@
9292
"plt.imshow(SSM, interpolation = 'nearest', cmap = 'afmhot')\n",
9393
"plt.title(\"Distance Matrix\")\n",
9494
"plt.subplot(236)\n",
95-
"plot_dgms(dgms2)\n",
95+
"plot_diagrams(dgms2)\n",
9696
"\n",
9797
"plt.show()"
9898
]
@@ -128,7 +128,7 @@
128128
"name": "python",
129129
"nbconvert_exporter": "python",
130130
"pygments_lexer": "ipython3",
131-
"version": "3.6.4"
131+
"version": "3.7.1"
132132
}
133133
},
134134
"nbformat": 4,

Image Patches.ipynb

+1-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
"from mpl_toolkits.mplot3d import Axes3D\n",
2424
"from matplotlib.offsetbox import OffsetImage, AnnotationBbox\n",
2525
"from ripser import ripser, plot_dgms\n",
26-
"from GeomUtils import getGreedyPerm\n",
2726
"import sys\n",
2827
"sys.path.append(\"DREiMac\")\n",
2928
"from ProjectiveCoordinates import ProjCoords, getStereoProjCodim1\n",
@@ -433,7 +432,7 @@
433432
"name": "python",
434433
"nbconvert_exporter": "python",
435434
"pygments_lexer": "ipython3",
436-
"version": "3.6.4"
435+
"version": "3.7.1"
437436
}
438437
},
439438
"nbformat": 4,

0 commit comments

Comments
 (0)