-
Notifications
You must be signed in to change notification settings - Fork 0
/
perceptron.py
109 lines (88 loc) · 3.37 KB
/
perceptron.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
from utils.optimizers import Momentum
from utils.activations import sigmoid
from utils.losses import mse
class Perceptron:
def __init__(self, M: int, lr = 0.01, activation = sigmoid):
"""
Parameters
----------
M : int
Amount of features of each input.
lr : float
Learning rate.
activation : str
Activation function.
"""
self.lr = lr
self.activation = activation
self.weights = np.zeros(M + 1) # + 1 for bias
self.optimizer = Momentum(self.weights.shape)
def fit(
self,
inputs: np.ndarray,
outputs: np.ndarray,
epochs = 300,
tolerance = 1e-3,
threshold_predictions = lambda P: P,
) -> tuple[list[np.ndarray], list[np.ndarray]]:
"""
Trains the perceptron to fit the `inputs` to the `outputs`.
Parameters
----------
inputs : numpy.ndarray
Input data of shape (N, M).
outputs : numpy.ndarray
Target outputs of shape (N, 1).
epochs : int
Maximum number of epochs to train.
tolerance : float
Minimum loss value to stop training.
threshold_predictions : callable
A function that thresholds the predictions.
Returns
-------
weight_history : list
A list of weights for each epoch.
predict_history : list
A list of predictions for each epoch.
"""
_inputs = np.insert(inputs, 0, 1, axis=1) # bias at the start of each input
weight_history = []
predict_history = []
for epoch in range(epochs):
predictions, H = self.predict(inputs)
predictions = threshold_predictions(predictions)
loss = mse(outputs, predictions)
if (loss <= tolerance): break # early stopping
# Works for step activation as well, thanks to how we defined these derivatives
gradients = mse.derivative(outputs, predictions) * self.activation.derivative(H)
gradients = gradients.reshape(-1, 1) # reshape to multiply each row as a scalar w/inputs
dw = -1 * self.lr * gradients * _inputs # (N, M+1)
dw = self.optimizer(dw)
dw = np.sum(dw, axis=0) # (1, M+1), squash deltas
self.weights += dw
if (epoch % 5 == 0): weight_history.append(self.weights.copy())
predict_history.append(predictions.copy())
if (epoch % 10 == 0): print(f"{epoch=} ; {loss=}")
weight_history.append(self.weights.copy())
predict_history.append(predictions.copy())
print(f"{epoch=} ; {loss=}")
return weight_history, predict_history
def predict(self, inputs: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""
Makes a prediction over `inputs`.
Parameters
----------
inputs : numpy.ndarray
Input data of shape (N, M).
Returns
-------
predictions : numpy.ndarray
Predicted outputs of shape (N, 1).
H : numpy.ndarray
Linear combination of the inputs and the weights, with shape (N, 1).
"""
_inputs = np.insert(inputs, 0, 1, axis=1) # bias at the start of each input
H = _inputs @ self.weights # (N, 1)
return self.activation(H), H