|
| 1 | +import numpy as np |
| 2 | +import sys |
| 3 | + |
| 4 | +from pandas import Series |
| 5 | +from scipy.spatial.distance import cdist |
| 6 | +from sklearn.metrics import precision_recall_curve, average_precision_score |
| 7 | +from sklearn.metrics import roc_curve, roc_auc_score, f1_score |
| 8 | + |
| 9 | + |
| 10 | +def evaluate(rnn, X_test, Y_test, threshold, first_k=0): |
| 11 | + |
| 12 | + Y_hats = np.zeros(Y_test.shape) |
| 13 | + |
| 14 | + for i in xrange(len(X_test)): |
| 15 | + Y_hats[i,:] = rnn.predict(X_test[i])[0] |
| 16 | + |
| 17 | + |
| 18 | + if first_k>0.0: |
| 19 | + yh = Y_hats[:,:k] |
| 20 | + yt = Y_test[:,:k] |
| 21 | + |
| 22 | + else: |
| 23 | + yh = Y_hats |
| 24 | + yt = Y_test |
| 25 | + |
| 26 | + print "calculating F1" |
| 27 | + f1, p, r = F1(yh, yt, threshold) |
| 28 | + |
| 29 | + print "calculating AUC" |
| 30 | + (ROC, AUC, throwaway) = ROC_AUC(yh,yt) |
| 31 | + |
| 32 | + print "calculating precision at 10:" |
| 33 | + p10, best_p10 = precision_at_k(yh, yt, 10) |
| 34 | + |
| 35 | + print "calculating precision at 20:" |
| 36 | + p20, best_p20 = precision_at_k(yh, yt, 20) |
| 37 | + |
| 38 | + print("AUC: %s, P@10: %s, bp10: %s, P@20: %s, bp20: %s, Prec: %s, Rec: %s, F1: %s" % (AUC, p10, best_p10, p20, best_p20, p, r, f1)) |
| 39 | + |
| 40 | + return |
| 41 | + |
| 42 | +def compute_micro_evaluations(Ytrue, Ypred, threshold_score='f1', criterion='zack', k=10): |
| 43 | + ytrue = Ytrue.flatten() |
| 44 | + ypred = Ypred.flatten() |
| 45 | + |
| 46 | + #fpr,tpr,troc = roc_curve(ytrue, ypred) |
| 47 | + #troc = np.hstack([troc, troc[-1]]) |
| 48 | + #roc = np.vstack([fpr,tpr]).T |
| 49 | + #auroc = roc_auc_score(ytrue, ypred) |
| 50 | + roc, auroc, troc = ROC_AUC(ypred, ytrue) |
| 51 | + roc = np.array(zip(*roc)).T |
| 52 | + |
| 53 | + prc, auprc, tprc = PRC_AUC(ypred, ytrue) |
| 54 | + f1c, _ = f1_curve(ypred, ytrue) |
| 55 | + |
| 56 | + if threshold_score == 'roc': |
| 57 | + threshold, _ = optimize_threshold_with_roc(roc, troc, criterion=criterion) |
| 58 | + elif threshold_score == 'prc': |
| 59 | + threshold, _ = optimize_threshold_with_prc(prc, tprc, criterion=criterion) |
| 60 | + else: |
| 61 | + threshold, _ = optimize_threshold_with_f1(f1c, tprc, criterion=criterion) |
| 62 | + |
| 63 | + f1, p, r = F1(ypred, ytrue, threshold) |
| 64 | + |
| 65 | + if len(Ytrue.shape) > 1 and Ytrue.shape[1] > k: |
| 66 | + pk, best_pk = precision_at_k(Ypred, Ytrue, k) |
| 67 | + else: |
| 68 | + pk, best_pk = (np.nan, np.nan) |
| 69 | + |
| 70 | + #return np.array([ auroc, auprc, f1, p, r, threshold, pk, best_pk ]) |
| 71 | + #return {'auroc': auroc, 'auprc': auprc, 'f1': f1, 'precision': p, 'recall': r, |
| 72 | + # 'threshold': threshold, 'precision_at_{0}'.format(k): pk, |
| 73 | + # 'best_precision_at_{0}'.format(k): best_pk} |
| 74 | + return Series([ auroc, auprc, f1, p, r, threshold, pk, best_pk ], |
| 75 | + index=[ 'auroc', 'auprc', 'f1', 'precision', 'recall', |
| 76 | + 'threshold', 'precision_at_{0}'.format(k), 'best_precision_at_{0}'.format(k) ]) |
| 77 | + |
| 78 | +def F1(Y_hats, Y_test, threshold): |
| 79 | + YH = Y_hats > threshold |
| 80 | + |
| 81 | + tp =(YH > .5) & (Y_test > 0) |
| 82 | + p = tp.sum()*1.0 / YH.sum() |
| 83 | + #print "tpsum: %s, YHsum: %s" % (tp.sum(), YH.sum()) |
| 84 | + r = tp.sum()*1.0 / Y_test.sum() |
| 85 | + return ((2 * p * r) / (p + r)), p, r |
| 86 | + |
| 87 | +def precision_at_k(Y_hats, Y_test, k): |
| 88 | + rows,cols = Y_hats.shape |
| 89 | + ranks = np.argsort(-1 * Y_hats, axis=1) |
| 90 | + numerator = 0. |
| 91 | + for i in xrange(rows): |
| 92 | + for j in xrange(k): |
| 93 | + numerator += Y_test[i, ranks[i,j]] |
| 94 | + |
| 95 | + p10 = numerator*1.0 / (rows * k) |
| 96 | + |
| 97 | + best_p10 = Y_test.sum()*1.0 / (rows*k) |
| 98 | + |
| 99 | + return p10, best_p10 |
| 100 | + |
| 101 | +def ROC_AUC(Y_hats, Y_test): |
| 102 | + |
| 103 | + #print "calculating number of true positives" |
| 104 | + total_positives = Y_test.sum()*1.0 |
| 105 | + total_negatives = len(Y_test.flatten())*1.0 - total_positives |
| 106 | + |
| 107 | + #print "sorting predictions by score" |
| 108 | + sorted_pred = sorted(zip(Y_hats.flatten(), Y_test.flatten()), key=lambda x: -1*x[0]) |
| 109 | + |
| 110 | + tp = 0.0 |
| 111 | + fp = 0.0 |
| 112 | + |
| 113 | + ROC = [] |
| 114 | + |
| 115 | + #print("passing through sorted predictions") |
| 116 | + for yh, gt in sorted_pred: |
| 117 | + #print "yh: %s, gt: %s" % (yh, gt) |
| 118 | + if gt == 1.0: |
| 119 | + tp += 1.0 |
| 120 | + else: |
| 121 | + fp += 1.0 |
| 122 | + |
| 123 | + ROC += [((fp/total_negatives), (tp/total_positives))] |
| 124 | + |
| 125 | + #calculate area under the curve |
| 126 | + l = len(ROC) |
| 127 | + AUC = 0.0 |
| 128 | + for x, y in ROC: |
| 129 | + AUC += y * (1.0/l) |
| 130 | + |
| 131 | + thresholds = zip(*sorted_pred)[0] |
| 132 | + return ROC, AUC, list(thresholds) |
| 133 | + |
| 134 | +def PRC_AUC(Y_hats, Y_test): |
| 135 | + p,r,thresholds = precision_recall_curve(Y_test.flatten(), Y_hats.flatten()) |
| 136 | + thresholds = np.hstack([thresholds, thresholds[-1]]) |
| 137 | + prc = np.vstack([r,p]).T |
| 138 | + auc = average_precision_score(Y_test.flatten(), Y_hats.flatten(), average='micro') |
| 139 | + return prc, auc, thresholds |
| 140 | + |
| 141 | +def f1_curve(Y_hats, Y_test): |
| 142 | + p,r,thresholds = precision_recall_curve(Y_test.flatten(), Y_hats.flatten()) |
| 143 | + thresholds = np.hstack([thresholds, thresholds[-1]]) |
| 144 | + f1 = (2 * p * r) / (p + r) |
| 145 | + return f1, thresholds |
| 146 | + |
| 147 | +def optimize_threshold_with_roc(roc, thresholds, criterion='dist'): |
| 148 | + if roc.shape[1] > roc.shape[0]: |
| 149 | + roc = roc.T |
| 150 | + assert(roc.shape[0] == thresholds.shape[0]) |
| 151 | + if criterion == 'margin': |
| 152 | + scores = roc[:,1]-roc[:,0] |
| 153 | + else: |
| 154 | + scores = -cdist(np.array([[0,1]]), roc) |
| 155 | + ti = np.nanargmax(scores) |
| 156 | + return thresholds[ti], ti |
| 157 | + |
| 158 | +def optimize_threshold_with_prc(prc, thresholds, criterion='min'): |
| 159 | + prc[np.isnan(prc)] = 0 |
| 160 | + if prc.shape[1] > prc.shape[0]: |
| 161 | + prc = prc.T |
| 162 | + assert(prc.shape[0] == thresholds.shape[0]) |
| 163 | + if criterion == 'sum': |
| 164 | + scores = prc.sum(axis=1) |
| 165 | + elif criterion.startswith('dist'): |
| 166 | + scores = -cdist(np.array([[1,1]]), prc) |
| 167 | + else: |
| 168 | + scores = prc.min(axis=1) |
| 169 | + ti = np.nanargmax(scores) |
| 170 | + return thresholds[ti], ti |
| 171 | + |
| 172 | +mp = np.finfo(float).eps |
| 173 | + |
| 174 | +def optimize_threshold_with_f1(f1c, thresholds, criterion='max'): |
| 175 | + #f1c[np.isnan(f1c)] = 0 |
| 176 | + if criterion == 'max': |
| 177 | + ti = np.nanargmax(f1c) |
| 178 | + else: |
| 179 | + ti = np.nanargmin(np.abs(thresholds-0.5*f1c)) |
| 180 | + #assert(np.all(thresholds>=0)) |
| 181 | + #idx = (thresholds>=f1c*0.5-mp) & (thresholds<=f1c*0.5+mp) |
| 182 | + #assert(np.any(idx)) |
| 183 | + #ti = np.where(idx)[0][f1c[idx].argmax()] |
| 184 | + return thresholds[ti], ti |
| 185 | + |
| 186 | +def random_split(n, test_frac=0.1): |
| 187 | + all_idx = np.arange(n) |
| 188 | + test_idx = all_idx[np.random.choice(n, int(np.ceil(test_frac*n)), replace=False)] |
| 189 | + train_idx = np.setdiff1d(all_idx, test_idx) |
| 190 | + assert(np.all(np.sort(np.hstack([train_idx, test_idx])) == all_idx)) |
| 191 | + return train_idx, test_idx |
| 192 | + |
| 193 | +def generate_one_split(Y, test_frac=0.1, valid_frac=0.1, minpos=10, verbose=0): |
| 194 | + split = None |
| 195 | + |
| 196 | + if verbose > 0: |
| 197 | + sys.stdout.write('Generating {0} test split'.format(test_frac)) |
| 198 | + sys.stdout.flush() |
| 199 | + while split is None: |
| 200 | + if verbose > 0: |
| 201 | + sys.stdout.write('.') |
| 202 | + sys.stdout.flush() |
| 203 | + not_test_idx, test_idx = random_split(Y.shape[0], test_frac=test_frac) |
| 204 | + assert(np.all(np.sort(np.hstack([not_test_idx,test_idx])) == np.arange(Y.shape[0]))) |
| 205 | + if np.all(Y[not_test_idx,:].sum(axis=0)>=2*minpos) and np.all(Y[test_idx,:].sum(axis=0)>=minpos): |
| 206 | + if verbose > 0: |
| 207 | + sys.stdout.write('Generating {0}/{1} train/test splits'.format(1-(test_frac+valid_frac), valid_frac)) |
| 208 | + sys.stdout.flush() |
| 209 | + while split is None: |
| 210 | + if verbose > 0: |
| 211 | + sys.stdout.write('.') |
| 212 | + sys.stdout.flush() |
| 213 | + train_idx, valid_idx = random_split(Y[not_test_idx].shape[0], test_frac=valid_frac/(1-test_frac)) |
| 214 | + assert(np.all(np.sort(np.hstack((train_idx, valid_idx))) == np.arange(Y[not_test_idx].shape[0]))) |
| 215 | + if np.all(Y[not_test_idx,:][train_idx,:].sum(axis=0)>=minpos) and np.all(Y[not_test_idx,:][valid_idx,:].sum(axis=0)>=minpos): |
| 216 | + split = ( np.sort(not_test_idx[train_idx]), np.sort(not_test_idx[valid_idx]), np.sort(test_idx) ) |
| 217 | + sys.stdout.write('DONE!\n') |
| 218 | + break |
| 219 | + |
| 220 | + return split |
| 221 | + |
| 222 | +def generate_splits(Y, num_splits=10, test_frac=0.1, valid_frac=0.1, minpos=10, verbose=0): |
| 223 | + return [ generate_one_split(Y, test_frac=test_frac, valid_frac=valid_frac, minpos=minpos, verbose=verbose) for i in range(num_splits) ] |
0 commit comments