-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlreg.py
139 lines (98 loc) · 4.08 KB
/
lreg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import numpy as np
import numpy.random as npr
import matplotlib.pyplot as plt
from mnist import MNIST
mndata = MNIST('.')
x_train, y_train = mndata.load_training()
x_test, y_test = mndata.load_testing()
to_np_array = lambda x, shape: np.array(x).reshape(shape)
# convert to numpy arrays
x_train = to_np_array(x_train, [-1, 28 * 28])
y_train = to_np_array(y_train, [-1, 1])
x_test = to_np_array(x_test, [-1, 28 * 28])
y_test = to_np_array(y_test, [-1, 1])
"""
Convert the Multi-Classification Problem into Binary Classification
Use -1.0 instead of 0.0 as it allows gradient descent (i.e., no zero-gradients)
"""
y_train = np.where(y_train != 1.0, -1.0, 1.0)
y_test = np.where(y_test != 1.0, -1.0, 1.0)
class LinearReg:
def __init__(self, in_dim: int, out_dim: int, bias: bool = False):
self.bias = bias
self.A = npr.uniform(-0.2, 0.2, [out_dim, in_dim])
if bias:
self.B = np.ones([out_dim, 1], dtype=np.float64)
def __call__(self, x, apply_sign=False):
yh = self.A @ x
if self.bias:
yh += self.B
if apply_sign:
return np.sign(yh)
return yh
def loss_fn(self, yh, y):
return (yh - y) ** 2.0
def train(model: LinearReg, train_data: tuple, epochs: int = 2, verbose: bool = False, train_with_bias: bool = False):
assert len(train_data) == 2
lrate = 9e-9
if train_with_bias:
assert model.bias == True, 'The bias parameter is not enabled.'
N = train_data[0].shape[0]
for epoch in range(epochs):
loss = 0.0
dJdA = np.zeros_like(model.A, dtype=np.float64)
for i in range(N):
# x : d x 1 and y : d-tilda x 1
x = train_data[0][i][..., None]
y = train_data[1][i][..., None]
# forward propagation: compute yh
yh = model(x)
loss += model.loss_fn(yh, y)
# compute gradients
dJdyh = (yh - y) # d-tilda x 1
dyhdA = x # d x 1
# A is of the shape d-tilda x d
dJdA += dJdyh @ dyhdA.T
if train_with_bias:
""" Calculate dJ w.r.t Bias or B, i.e., dJ/dB """
# tip: djdyh is used here.
# reminder: the loss function J = (1 / (2 * N)) summation of (yh - y)^2
# remove continue once you are done
continue
loss /= (N * 2.0)
dJdA /= N
# update A
model.A -= lrate * dJdA
if train_with_bias:
""" Update Bias --- declare dJdB somewhere and make sure the shape is correct...."""
# what should we with dJdB before updating B?
# hint: Look at line 90
# ask yourself, why we need to do that
#model.B -= lrate * dJdB
if verbose:
print("Training iteration:", epoch + 1, "Loss:", float(loss))
if epoch == epochs - 1:
print("Training iteration:", epoch + 1, "Loss:", float(loss))
return model
def evaluate(model: LinearReg, test_data: tuple):
assert len(test_data) == 2
loss, accuracy = 0.0, 0.0
N = test_data[0].shape[0]
for i in range(N):
# x : d x 1 and y : d-tilda x 1
x = test_data[0][i][..., None]
y = test_data[1][i][..., None]
# forward propagation: compute yh
yh = model(x)
loss += model.loss_fn(yh, y)
# calculate accuracy
yh = np.sign(yh)
accuracy += np.where(yh == y, 1.0, 0.0).sum() / y.size
loss /= (N * 2.0)
accuracy /= N
print("Loss:", float(loss), "Accuracy:", accuracy)
model = LinearReg(in_dim = 784, out_dim = 1, bias = False)
# don't over train it ---- keep no. of epochs minimal
model = train(model, (x_train, y_train), epochs = 15)
evaluate(model, (x_test, y_test))
print("Pred:", model(x_test[0][..., None], apply_sign=True), "True:", y_test[0])