-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmetrics.py
58 lines (50 loc) · 1.66 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import numpy as np
from scipy.stats import entropy
from sklearn.neighbors import NearestNeighbors
def entropy_batch_mixing(
data, labels, n_neighbors=50, n_pools=50, n_samples_per_pool=100
):
"""Computes Entory of Batch mixing metric for ``adata`` given the batch column name.
Parameters
----------
data
Numpy ndarray of data
labels
Numpy ndarray of labels
n_neighbors: int
Number of nearest neighbors.
n_pools: int
Number of EBM computation which will be averaged.
n_samples_per_pool: int
Number of samples to be used in each pool of execution.
Returns
-------
score: float
EBM score. A float between zero and one.
"""
def __entropy_from_indices(indices, n_cat):
return entropy(
np.array(np.unique(indices, return_counts=True)[1].astype(np.int32)),
base=n_cat,
)
n_cat = len(np.unique(labels))
neighbors = NearestNeighbors(n_neighbors=n_neighbors + 1).fit(data)
indices = neighbors.kneighbors(data, return_distance=False)[:, 1:]
batch_indices = np.vectorize(lambda i: labels[i])(indices)
entropies = np.apply_along_axis(
__entropy_from_indices, axis=1, arr=batch_indices, n_cat=n_cat
)
# average n_pools entropy results where each result is
# an average of n_samples_per_pool random samples.
if n_pools == 1:
score = np.mean(entropies)
else:
score = np.mean(
[
np.mean(
entropies[np.random.choice(len(entropies), size=n_samples_per_pool)]
)
for _ in range(n_pools)
]
)
return score