-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLogisticRegression_Firth.py
293 lines (237 loc) · 11.1 KB
/
LogisticRegression_Firth.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 6 09:59:14 2021
@author: ll17354
"""
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.linear_model._logistic import LogisticRegression
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
import sys
import warnings
import math
import statsmodels
import numpy as np
from scipy import stats
import statsmodels.api as smf
def firth_likelihood(beta, logit):
return -(logit.loglike(beta) + 0.5*np.log(np.linalg.det(-logit.hessian(beta))))
def null_fit_firth(y, X, start_vec = None, step_limit=1000, convergence_limit=0.0001):
"""
Computes the null model in the likelihood ratio test
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training vector, where n_samples is the number of samples and
n_features is the number of features. Make sure X has an intercept
term (column of ones).
y : array-like of shape (n_samples,)
Target vector relative to X. Please note this function only currently works for
binomial regression so output values of {0, 1} will work while
{0, 1, 2} will not.
start_vec : int or None, optional
starting vector The default is None.
step_limit : TYPE, optional
Max number of steps before MLE termination. The default is 1000.
convergence_limit : TYPE, optional
Minimum difference between MLE's. The default is 0.0001.
Returns
-------
return_fit :
intercept: Intercept coeffcient
beta: list of beta coeffcients
bse: coeffcient standard errors
fitll: fit log-likelihood
"""
logit_model = smf.Logit(y, X)
if start_vec is None:
start_vec = np.zeros(X.shape[1])
beta_iterations = []
beta_iterations.append(start_vec)
for i in range(0, step_limit):
pi = logit_model.predict(beta_iterations[i])
W = np.diagflat(np.multiply(pi, 1-pi))
var_covar_mat = np.linalg.pinv(-logit_model.hessian(beta_iterations[i]))
# build hat matrix
rootW = np.sqrt(W)
H = np.dot(np.transpose(X), np.transpose(rootW))
H = np.matmul(var_covar_mat, H)
H = np.matmul(np.dot(rootW, X), H)
# penalised score
U = np.matmul(np.transpose(X), y - pi + np.multiply(np.diagonal(H), 0.5 - pi))
new_beta = beta_iterations[i] + np.matmul(var_covar_mat, U)
# step halving
j = 0
while firth_likelihood(new_beta, logit_model) > firth_likelihood(beta_iterations[i], logit_model):
new_beta = beta_iterations[i] + 0.5*(new_beta - beta_iterations[i])
j = j + 1
if (j > step_limit):
sys.stderr.write('Firth regression failed. Try increasing step limit.\n')
return None
beta_iterations.append(new_beta)
if i > 0 and (np.linalg.norm(beta_iterations[i] - beta_iterations[i-1]) < convergence_limit):
break
return_fit = None
if np.linalg.norm(beta_iterations[i] - beta_iterations[i-1]) >= convergence_limit:
sys.stderr.write('Firth regression failed to converge.\n')
else:
# Calculate stats
fitll = -firth_likelihood(beta_iterations[-1], logit_model)
intercept = beta_iterations[-1][0]
beta = beta_iterations[-1][1:].tolist()
bse = np.sqrt(np.diagonal(np.linalg.pinv(-logit_model.hessian(beta_iterations[-1]))))
return_fit = intercept, beta, bse, fitll
return return_fit
class Firth_LogisticRegression(LogisticRegression,
ClassifierMixin,
BaseEstimator):
"""
This class represents a rewriting Firth regression originally implemented
by John Lees (https://gist.github.com/johnlees/3e06380965f367e4894ea20fbae2b90d)
into a class which can interact with the sci-kit learn ecosystem.
To use the fit function make sure X has an intercept term (column of ones).
When using validation functions make sure to not include this 'dummy' column
of ones.
Please note: This estimator class does not currently pass the check_estimator test
in sklearn. This is because it cannot perform the multinomial classification task that
check_estimator attempts to pass it.
Parameters
----------
start_vec : ndarray of shape (n_features, 1). Default set to None in which
case the zero vector is used.
step_limit : int.
convergence_limit : float.
multi_class : string. Default is set to 'ovr' to let this function intgerate
with the logistic_regression parent class and pass the _check_multi_class
function. A bit hacky but works.
Antributes
----------
classes_ : ndarray of shape (n_classes, )
A list of class labels known to the classifier.
coef_ : ndarray of shape (1, n_features) or (n_classes, n_features)
Coefficient of the features in the decision function.
`coef_` is of shape (1, n_features) when the given problem is binary.
In particular, when `multi_class='multinomial'`, `coef_` corresponds
to outcome 1 (True) and `-coef_` corresponds to outcome 0 (False).
beta_ : list of size n_features. This is used in the wald and likelihood
ratio test functions.
intercept_ : ndarray of shape (1,) or (n_classes,)
Intercept (a.k.a. bias) added to the decision function.
"""
def __init__(self, start_vec = None,
step_limit = 1000,
convergence_limit = 0.0001,
multi_class = 'ovr'):
self.start_vec = start_vec
self.step_limit = step_limit
self.convergence_limit = convergence_limit
self.multi_class = multi_class # multiclass should not be changed from 'ovr'
def fit(self, X = None, y = None):
"""
Fits the model accoridng to given training data. This fit function which
has been changed to work inaccordance with the sklearn estimator
documentation. Major changes are, rather than returning specific
variables fit() return an instance of itself allowing other functions
to be run from it.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training vector, where n_samples is the number of samples and
n_features is the number of features. Make sure X has an intercept
term (column of ones).
y : array-like of shape (n_samples,)
Target vector relative to X. Please note this function only currently works for
binomial regression so output values of {0, 1} will work while
{0, 1, 2} will not.
Returns
-------
self
Fitted estimator.
self.fitll_ : fit log-likelihood
self.intercept_ : intercept
self.coef_ : coeffcients not including intercept (used in all other sklearn classes )
self.beta_ : coeffcients including intercept (used in wald and LR tests)
self.bse_ : standard errors
"""
X, y = check_X_y(X, y)
self.n_features_in = X.shape[1]-1
self.classes_ = np.unique(y)
logit_model = smf.Logit(y, X)
if self.start_vec is None:
start_vec = np.zeros(X.shape[1])
beta_iterations = []
beta_iterations.append(start_vec)
for i in range(0, self.step_limit):
pi = logit_model.predict(beta_iterations[i])
W = np.diagflat(np.multiply(pi, 1-pi))
var_covar_mat = np.linalg.pinv(-logit_model.hessian(beta_iterations[i]))
# build hat matrix
rootW = np.sqrt(W)
H = np.dot(np.transpose(X), np.transpose(rootW))
H = np.matmul(var_covar_mat, H)
H = np.matmul(np.dot(rootW, X), H)
# penalised score
U = np.matmul(np.transpose(X), y - pi + np.multiply(np.diagonal(H), 0.5 - pi))
new_beta = beta_iterations[i] + np.matmul(var_covar_mat, U)
# step halving
j = 0
while firth_likelihood(new_beta, logit_model) > firth_likelihood(beta_iterations[i], logit_model):
new_beta = beta_iterations[i] + 0.5*(new_beta - beta_iterations[i])
j = j + 1
if (j > self.step_limit):
sys.stderr.write('Firth regression failed. Try increasing step limit.\n')
return None
beta_iterations.append(new_beta)
if i > 0 and (np.linalg.norm(beta_iterations[i] - beta_iterations[i-1]) < self.convergence_limit):
break
if np.linalg.norm(beta_iterations[i] - beta_iterations[i-1]) >= self.convergence_limit:
sys.stderr.write('Firth regression failed to converge\n')
else:
# Calculate stats
self.fitll_ = -firth_likelihood(beta_iterations[-1], logit_model)
self.intercept_ = beta_iterations[-1][0]
self.coef_ = np.array(beta_iterations[-1][1:].tolist()).reshape((1, self.n_features_in)) #for other sklearn functions
self.beta_ = [self.intercept_] + beta_iterations[-1][1:].tolist() #used by Wald and LR test
self.bse_ = np.sqrt(np.diagonal(np.linalg.pinv(-logit_model.hessian(beta_iterations[-1]))))
return self
def test_wald(self):
'''
Implemnatation of the wald test
Returns
-------
waldp : list
A list p-values from the Wald test.
'''
check_is_fitted(self)
waldp = []
for beta_val, bse_val in zip(self.beta_, self.bse_):
waldp.append(2 * (1 - stats.norm.cdf(abs(beta_val/bse_val))))
return waldp
def test_likelihoodratio(self, X, y, start_vec = None, step_limit=1000, convergence_limit=0.0001):
"""
Implementation of the likelihood ratio test. An external function,
null_fit_firth(), is used to refit the null-estimator.
Parameters
----------
X : {array-like} of shape (n_samples, n_features)
Training vector, where n_samples is the number of samples and
n_features is the number of features. Make sure to include the dummy column
of ones.
y : array-like of shape (n_samples,)
Target vector relative to X.
Returns
-------
lrtp : List
List of p-values from the likelihood ratio test.
"""
check_is_fitted(self)
X_np = X.values
lrtp = []
for beta_idx, (beta_val, bse_val) in enumerate(zip(self.beta_, self.bse_)):
null_X = np.delete(X_np, beta_idx, axis=1)
(null_intercept, null_beta, null_bse, null_fitll) = null_fit_firth(y, null_X, start_vec, step_limit, convergence_limit)
lrstat = -2*(null_fitll - self.fitll_)
lrt_pvalue = 1
if lrstat > 0: # non-convergence
lrt_pvalue = stats.chi2.sf(lrstat, 1)
lrtp.append(lrt_pvalue)
return lrtp