forked from cmudeeplearning11785/hw2p2-fall18
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
101 lines (72 loc) · 3.14 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from __future__ import print_function
import os
import numpy as np
from tqdm import tqdm
from scipy.optimize import brentq
from scipy.interpolate import interp1d
from sklearn.metrics import roc_curve
def train_load(path, parts):
"""
Example Usage: train_load("/path/to/training/files/", range(1,4)) to load npz files 1-3 inclusive.
Stacks all the utterances and re-numbers the speaker IDs into a dense integer domain.
NOTE: Expects the npz files to have already been preprocessed (i.e., in terms of VAD and normalization) using
the preprocess.py script
"""
features = []
speakers = []
for p in tqdm(parts):
npz = np.load(os.path.join(path, str(p) + ".preprocessed.npz"), encoding='latin1')
features.append(npz['feats'])
speakers.append(npz['targets'])
features = np.concatenate(features)
speakers = np.concatenate(speakers)
nspeakers = densify_speaker_IDs(speakers)
print("\nLoaded", len(features), "utterances from", nspeakers, "unique speakers.")
return features, speakers, nspeakers
def dev_load(path):
"""
Given path to the dev.preprocessed.npz file, loads and returns:
(1) Dev trials list, where each item is [enrollment_utterance_idx, test_utterance_idx]
(2) Dev trials labels, where each item is True if same speaker (and False otherwise)
(3) (Dev) Enrollment array of utterances
(4) (Dev) Test array of utterances
"""
assert '.preprocessed.npz' in path
data = np.load(path, encoding='latin1')
enrol, test = data['enrol'], data['test']
print("Loaded dev data.")
return data['trials'], data['labels'], enrol, test
def test_load(path):
"""
Given path to the test.preprocessed.npz file, loads and returns:
(1) Test trials list, where each item is [enrollment_utterance_idx, test_utterance_idx]
(2) (Test) Enrollment array of utterances
(3) (Test) Test array of utterances
"""
assert '.preprocessed.npz' in path
data = np.load(path, encoding='latin1')
enrol, test = data['enrol'], data['test']
print("Loaded test data.")
return data['trials'], enrol, test
def EER(labels, scores):
"""
Computes EER (and threshold at which EER occurs) given a list of (gold standard) True/False labels
and the estimated similarity scores by the verification system (larger values indicates more similar)
Sources: https://yangcha.github.io/EER-ROC/ & https://stackoverflow.com/a/49555212/1493011
"""
fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=True)
eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
thresh = interp1d(fpr, thresholds)(eer)
return eer, thresh
def densify_speaker_IDs(speakers):
"""
Given an array of (integer) speaker IDs, re-numbers the IDs in-place into a dense integer domain,
from 0 to (# of speakers)-1, and returns # of speakers.
"""
assert(speakers.dtype in [np.int32, np.int64])
speaker2ID = {}
for idx, speaker in enumerate(speakers):
speaker2ID[speaker] = speaker2ID.get(speaker, len(speaker2ID))
speakers[idx] = speaker2ID[speaker]
nspeakers = len(speaker2ID)
return nspeakers