-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinear_regression.py
94 lines (74 loc) · 2.67 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# encoding:utf-8
import numpy as np
class LinearRegression(object):
def __init__(self, reg_str=0.0):
self.W = None
self.error = None
self.lam = reg_str # regularization strength
def _linear_hypothesis(self, W, X):
"""
h = wX + b
"""
return np.sum(W * X, axis=1, keepdims=True)
def _cal_gradient(self, W, X, y):
"""
g = (Σ[(h - y) * x_j] + lam * W ) / m
"""
h = self._linear_hypothesis(W, X)
g = (np.sum((h - y) * X, axis=0, keepdims=True) + self.lam * W) / X.shape[0]
return g
def _cal_error(self, W, X, y):
"""
loss = Σ[(h - y)^2] / (2 * m) + (lam * W^2) / (2 * m)
"""
h = self._linear_hypothesis(W, X)
error = (np.sum((h - y) ** 2) + np.sum(self.lam * W ** 2)) / (2 * X.shape[0])
return error
def _concat_bias(self, X):
bias = np.ones([X.shape[0], 1])
X = np.concatenate([X, bias], axis=1)
return X
def fit(self, X, y, epoch=100, learning_rate=0.1, tolerance=1e-4):
n_samples, n_features = X.shape
# consider x_0 = 1 which is the feature val of the bias
X = self._concat_bias(X)
y = np.reshape(y, [-1, 1])
# init weights
self.W = np.zeros([1, n_features + 1])
# mini-batch SGD
mb = min(1000, n_samples / 10 or n_samples)
for i in range(epoch):
s = 0
while s < n_samples:
e = s + mb
X_b = X[s:e]
y_b = y[s:e]
g = self._cal_gradient(self.W, X_b, y_b)
self.W -= learning_rate * g
self.error = self._cal_error(self.W, X, y)
if self.error < tolerance:
return self
s = e
return self
def predict(self, X_test):
X = self._concat_bias(X_test)
return self._linear_hypothesis(self.W, X)
if __name__ == '__main__':
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
X, y = load_boston(True)
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
model = LinearRegression()
model.fit(X_train, y_train)
pred = model.predict(X_test)
plt.figure()
plt.plot(range(0, y_test.shape[0]), y_test, 'g-', label='Real price')
plt.plot(range(0, y_test.shape[0]), pred, 'r--', label='Predicted price')
plt.xlabel('samples')
plt.ylabel('price')
plt.title('Boston house-prices')
plt.legend(loc='upper left')
plt.show()