-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlogistic_regression.py
95 lines (68 loc) · 2.98 KB
/
logistic_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np
from dataclasses import dataclass
@dataclass
class LogisticRegression:
epochs: int
learning_rate: float
threshold: float
logging: bool
def sigmoid(self, predictions: np.ndarray) -> np.ndarray:
"""The numerically stable implementation of the Sigmoid activation function."""
neg_mask = predictions < 0
pos_mask = ~neg_mask
zs = np.empty_like(predictions)
zs[neg_mask] = np.exp(predictions[neg_mask])
zs[pos_mask] = np.exp(-predictions[pos_mask])
res = np.ones_like(predictions)
res[neg_mask] = zs[neg_mask]
return res / (1 + zs)
def mean_log_loss(self, predictions: np.ndarray, labels: np.ndarray) -> np.float32:
"""Computes the mean Cross Entropy Loss (in binary classification, also called Log-loss)."""
return -(labels * np.log(predictions) + (1 - labels) * np.log(1 - predictions)).mean()
def fit(self, features: np.ndarray, labels: np.ndarray) -> None:
"""Fits the Logistic Regression model."""
num_samples, num_features = features.shape
self.weights, self.bias = np.zeros(num_features), 0
for epoch in range(self.epochs):
prediction = self.sigmoid(features.dot(self.weights) + self.bias)
difference = prediction - labels
d_weights = features.T.dot(difference) / num_samples
d_bias = difference.sum() / num_samples
self.weights -= self.learning_rate * d_weights
self.bias -= self.learning_rate * d_bias
if self.logging:
print(f"Mean Log-loss [{epoch}]: {self.mean_log_loss(prediction, labels):.3f}")
def predict(self, features: np.ndarray) -> np.ndarray:
"""Performs inference using the given features."""
return np.where(self.sigmoid(features.dot(self.weights) + self.bias) < self.threshold, 0, 1)
if __name__ == "__main__":
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
plt.style.use("bmh")
# Prepare the data
data = load_breast_cancer()
# Train/test split
train_features, test_features, train_labels, test_labels = train_test_split(
data.data,
data.target,
test_size=0.33,
random_state=0,
)
logistic_regression = LogisticRegression(
learning_rate=2e-5,
epochs=256,
threshold=0.5,
logging=False,
)
logistic_regression.fit(train_features, train_labels)
predictions = logistic_regression.predict(test_features)
accuracy = accuracy_score(test_labels, predictions)
precision, recall, fscore, _ = precision_recall_fscore_support(
test_labels, predictions, average="macro"
)
print(f"Accuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F-score: {fscore:.3f}")