-
Notifications
You must be signed in to change notification settings - Fork 0
/
lazy_greedy.py
108 lines (88 loc) · 2.87 KB
/
lazy_greedy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
print(__doc__)
import matplotlib
#matplotlib.use('TkAgg')
import heapq
import numpy as np
import pandas as pd
import scipy as sp
import math
from scipy import spatial
import matplotlib.pyplot as plt
class FacilityLocation:
def __init__(self, D, V, alpha=1.):
'''
Args
- D: np.array, shape [N, N], similarity matrix
- V: list of int, indices of columns of D
- alpha: float
'''
self.D = D
self.curVal = 0
self.curMax = np.zeros(len(D))
self.gains = []
self.alpha = alpha
self.f_norm = self.alpha / self.f_norm(V)
self.norm = 1. / self.inc(V, [])
def f_norm(self, sset):
return self.D[:, sset].max(axis=1).sum()
def inc(self, sset, ndx):
if len(sset + [ndx]) > 1:
if not ndx: # normalization
return math.log(1 + self.alpha * 1)
return self.norm * math.log(1 + self.f_norm * np.maximum(self.curMax, self.D[:, ndx]).sum()) - self.curVal
else:
return self.norm * math.log(1 + self.f_norm * self.D[:, ndx].sum()) - self.curVal
def add(self, sset, ndx):
cur_old = self.curVal
if len(sset + [ndx]) > 1:
self.curMax = np.maximum(self.curMax, self.D[:, ndx])
else:
self.curMax = self.D[:, ndx]
self.curVal = self.norm * math.log(1 + self.f_norm * self.curMax.sum())
self.gains.extend([self.curVal - cur_old])
return self.curVal
def _heappush_max(heap, item):
heap.append(item)
heapq._siftdown_max(heap, 0, len(heap)-1)
def _heappop_max(heap):
"""Maxheap version of a heappop."""
lastelt = heap.pop() # raises appropriate IndexError if heap is empty
if heap:
returnitem = heap[0]
heap[0] = lastelt
heapq._siftup_max(heap, 0)
return returnitem
return lastelt
def lazy_greedy_heap(F, V, B):
curVal = 0
sset = []
vals = []
order = []
heapq._heapify_max(order)
[_heappush_max(order, (F.inc(sset, index), index)) for index in V]
while order and len(sset) < B:
el = _heappop_max(order)
improv = F.inc(sset, el[1])
# check for uniques elements
if improv >= 0:
if not order:
curVal = F.add(sset, el[1])
sset.append(el[1])
vals.append(curVal)
else:
top = _heappop_max(order)
if improv >= top[0]:
curVal = F.add(sset, el[1])
sset.append(el[1])
vals.append(curVal)
else:
_heappush_max(order, (improv, el[1]))
_heappush_max(order, top)
return sset, vals
def test():
n = 10
X = np.random.rand(n, n)
D = X * np.transpose(X)
F = FacilityLocation(D, range(0, n))
sset = lazy_greedy(F, range(0, n), 15)
print(sset)