-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconvolution.py
204 lines (184 loc) · 8.11 KB
/
convolution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import numpy as np
class Conv:
def __init__(self, weights_shape, activation, stride=1, padding=0, batch_norm_momentum=False):
self.weights_shape = weights_shape
var = 1 / weights_shape[0]*weights_shape[1]*weights_shape[3]
self.weights = np.random.normal(0, np.sqrt(var), weights_shape)
self.biases = np.random.randn(1, 1, 1, weights_shape[2])
self.stride = stride
self.pad = padding
self.activation = activation
self.output = None
self.input = None
self.input_shape = None
self.training = False
self.batch_normalize = batch_norm_momentum
self.batch_norm_cache = None
self.mean = np.ones(self.biases.shape)
self.variance = np.ones(self.biases.shape)
self.gamma = np.ones(self.biases.shape)
self.update_flag = True
self.vw = 0
self.vb = 0
self.sw = 0
self.sb = 0
self.vg = 0
self.sg = 0
def forward(self, input_layer, weights=None, stride=None, pad=None, d_conv=False, d_a=False, delta=None):
if not d_conv:
if not d_a:
self.input = input_layer
self.input_shape = input_layer.shape
weights = self.weights
stride = self.stride
pad = self.pad
weights = weights.copy()
input_layer = input_layer.copy()
i0, i1, i2, i3 = input_layer.shape
if pad:
temp_layer = np.zeros((i0 + 2 * pad, i1 + 2 * pad, i2, i3))
temp_layer[pad:pad + i0, pad:i1 + pad] = input_layer
input_layer = temp_layer
i0, i1, _, _ = input_layer.shape
w0, w1, w2, w3 = weights.shape
n00 = (i0 - w0) // stride + 1
n01 = (i1 - w1) // stride + 1
if d_conv:
input_layer = np.expand_dims(input_layer, axis=3)
weights = np.expand_dims(weights, axis=4)
out_shape = (n00, n01, i2, w3, i3)
elif d_a:
delta = np.expand_dims(delta.copy(), axis=4)
weights = np.expand_dims(weights, axis=2)
out_shape = 1, 1
else:
input_layer = np.expand_dims(input_layer, axis=3)
weights = np.expand_dims(weights, axis=2)
out_shape = (n00, n01, i2, w2, w3)
_, _, w2, w3, w4 = weights.shape
out = np.zeros(out_shape)
for i in range(1, w0 + 1):
if i * stride >= w0:
n1 = i
extra1 = (i * stride) - w0
break
if i + w0 > i0:
n1 = i
extra1 = 0
break
n2 = n1
extra2 = extra1
if w0 != w1:
for i in range(1, w1 + 1):
if i * stride >= w1:
n2 = i
extra2 = (i * stride) - w1
break
if i + w1 > i1:
n2 = i
extra2 = 0
break
d = np.zeros((w0 + extra1, w1 + extra2, w2, w3, w4))
if extra1 or extra2:
d[:w0, :w1] = weights
weights = d
w0, w1, _, _, _ = weights.shape
temp1 = i0 // w0 if (i0 % w0) < (w0 - extra1) else i0 // w0 + 1
temp2 = i1 // w1 if (i1 % w1) < (w1 - extra2) else i1 // w1 + 1
fil = np.tile(weights, (temp1, temp2, 1, 1, 1))
def func1(x):
odd = np.remainder(x, 2).astype(bool)
even = np.invert(odd)
x[odd] = w0 - extra1 + (x[odd] // 2) * w0
x[even] = (x[even] // 2) * w0
x = x[x < x.size]
return x
def func2(x):
odd = np.remainder(x, 2).astype(bool)
even = np.invert(odd)
x[odd] = w1 - extra2 + (x[odd] // 2) * w1
x[even] = (x[even] // 2) * w1
x = x[x < x.size]
return x
for c1, j in enumerate(range(0, w0, stride)):
for c2, i in enumerate(range(0, w1, stride)):
s1 = (j + fil.shape[0]) - i0
s2 = (i + fil.shape[1]) - i1
temp3 = fil
if s1 > 0:
if s1 > extra1:
temp3 = temp3[:-w0]
else:
temp3 = temp3[:-s1]
if s2 > 0:
if s2 > extra2:
temp3 = temp3[:, : -w1]
else:
temp3 = temp3[:, :-s2]
if d_a:
delt = np.repeat(np.repeat(delta[c1::n1, c2::n2], w0, 0), w1, 1)[:temp3.shape[0],
:temp3.shape[1]] * temp3
input_layer[j:j + temp3.shape[0], i:i + temp3.shape[1]] += delt.sum(axis=3)
else:
b = temp3 * input_layer[j:j + temp3.shape[0], i:i + temp3.shape[1]]
b = np.add.reduceat(b, np.fromfunction(func1, (b.shape[0],)).tolist(), axis=0)[::2]
b = np.add.reduceat(b, np.fromfunction(func2, (b.shape[1],)).tolist(), axis=1)[:, ::2]
out[c1::n1, c2::n2] = b
if d_a:
return input_layer
if d_conv:
return out.sum(axis=2)
out = out.sum(axis=4)
if self.batch_normalize:
out = self.batch_norm(out)
self.output = self.activation.forward(out + self.biases)
return self.output
def find_gradient(self, delta):
s1, s2, s3, s4 = delta.shape
delta_b = np.add.reduce(delta.sum(axis=2, keepdims=True), axis=(0, 1), keepdims=True)
delta_a = self.forward(np.zeros_like(self.input), d_a=True, delta=delta)
expanded = np.zeros((s1 * self.stride - (self.stride - 1), s2 * self.stride - (self.stride - 1), s3, s4))
expanded[::self.stride, ::self.stride] = delta
temp = self.forward(self.input, expanded, 1, self.pad, True)
delta_a = delta_a[self.pad:self.pad + self.input_shape[0],
self.pad:self.pad + self.input_shape[1]]
delta = temp[:self.weights_shape[0], :self.weights_shape[1]]
return delta, delta_b, delta_a
def update(self, delta, learning_rate, mini_size, beta1=0.9, beta2=0.999):
delta = self.activation.backward(delta)
if self.batch_normalize:
delta, delta_g = self.batch_norm_backwards(delta)
self.vg = beta1 * self.vg + (1 - beta1) * delta_g
self.sg = beta2 * self.sg + (1 - beta2) * np.square(delta_g)
self.gamma -= learning_rate * self.vg / np.sqrt(self.sg + 1e-8)
delta_w, delta_b, delta_z = self.find_gradient(delta)
if self.update_flag:
delta_w = delta_w/mini_size
delta_b = delta_b/mini_size
self.vw = beta1 * self.vw + (1 - beta1) * delta_w
self.vb = beta1 * self.vb + (1 - beta1) * delta_b
self.sw = beta2 * self.sw + (1 - beta2) * np.square(delta_w)
self.sb = beta2 * self.sb + (1 - beta2) * np.square(delta_b)
self.weights -= learning_rate * self.vw / np.sqrt(self.sw + 1e-8)
self.biases -= learning_rate * self.vb / np.sqrt(self.sb + 1e-8)
return delta_z
def batch_norm(self, inputs):
if self.training:
mean = np.mean(inputs, axis=(0, 1, 2), keepdims=True)
variance = np.var(inputs, axis=(0, 1, 2), keepdims=True)
self.mean = self.batch_normalize * self.mean + (1-self.batch_normalize)*mean
self.variance = self.batch_normalize * self.variance + (1-self.batch_normalize)*variance
else:
mean = self.mean
variance = self.variance
x_hat = (inputs - mean)/np.sqrt(variance+1e-8)
self.batch_norm_cache = x_hat, variance
return x_hat * self.gamma
def batch_norm_backwards(self, delta):
x_hat, variance = self.batch_norm_cache
d_xhat = delta * self.gamma
d_gamma = np.sum(delta*x_hat)
m = delta.shape[0] * delta.shape[1] * delta.shape[2]
d_z = (m * d_xhat - np.sum(d_xhat, axis=(0, 1, 2), keepdims=True) - x_hat
* np.sum(d_xhat * x_hat, axis=(0, 1, 2), keepdims=True)) / (m * np.sqrt(variance + 1e-8))
return d_z, d_gamma