-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneural_network.py
428 lines (326 loc) · 16.4 KB
/
neural_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
import math
import random
import numpy as np
import xml.etree.ElementTree as ET
__biases_for_layers: list
__weights_for_layers: list
__layers_before_activation: list
__layers_sizes: list
__learning_rate: float
__activation_function: str
def initialize(*layers_sizes: int, activation_function: str = 'ReLU'):
"""
This class is used for creating, testing and exploiting standard (classic) Neural Network.
Its supports network with any amount of layers as well as neurons in each layer.
Training is being done using backpropagation.
Set-up neural network
:param activation_function:
Activation function used for layers. Every layer shall be activated by it except last one which is activated
using softmax. 'ReLU' and 'sigmoid' are supported.
:param layers_sizes:
List of layers, each (int) value represents amount of neurons in given layer.
First layer (index = 0) represent input layer, last (index = len - 1) - output layer.
List elements in between creates hidden layers
"""
global __layers_sizes, __layers_before_activation, __weights_for_layers, __biases_for_layers, __activation_function
if len(layers_sizes) < 2:
raise Exception("There must be no less than 2 layers!")
activation_function = activation_function.lower()
if not (activation_function == 'relu' or activation_function == 'sigmoid'):
raise Exception("Provided activation function is not supported!")
__layers_sizes = layers_sizes
__activation_function = activation_function
# set up layers (fill with zeros)
__layers_before_activation = [np.zeros((x, 1)) for x in layers_sizes]
# declare lists of matrices
__weights_for_layers = [] # for weights
__biases_for_layers = [] # for biases
# set up matrices
for i in range(len(layers_sizes) - 1):
current_size = layers_sizes[i] # from layer (index)
next_size = layers_sizes[i + 1] # to layer (index)
# setup weights matrix between from/to (index) layers
__weights_for_layers.append(np.array([(x - 0.5) / 2 for x in np.random.rand(next_size, current_size)]))
# setup biases matrix for (next index - skipping input layer) layers
__biases_for_layers.append(np.array([(x - 0.5) / 5 for x in np.random.rand(next_size, 1)]))
def load_from_xml_file(file_path: str) -> None:
"""
Load neural network parameters from an XML file.
:param file_path: The path to the XML file containing the parameters.
:return: None
"""
global __layers_sizes, __layers_before_activation, __weights_for_layers, __biases_for_layers, __activation_function
tree = ET.parse(file_path)
root = tree.getroot()
__layers_sizes = [int(x.text) for x in root.find('layers_sizes').findall('layer_size')]
__activation_function = root.find('activation_function').text
# set up layers (fill with zeros)
__layers_before_activation = [np.zeros((x, 1)) for x in __layers_sizes]
# declare lists of matrices
__weights_for_layers = [] # for weights
__biases_for_layers = [] # for biases
for weights_matrix_xml in root.find('all_weights').findall('weights_matrix'):
shape = (int(weights_matrix_xml.attrib['row_amount']), int(weights_matrix_xml.attrib['column_amount']))
weights_matrix = np.zeros(shape)
for weight_xml in weights_matrix_xml.findall('weight'):
row, column = int(weight_xml.attrib['row_index']), int(weight_xml.attrib['column_index'])
weights_matrix[row][column] = float(weight_xml.text)
__weights_for_layers.append(weights_matrix)
for biases_matrix_xml in root.find('all_biases').findall('biases_matrix'):
shape = (int(biases_matrix_xml.attrib['row_amount']), 1)
biases_matrix = np.zeros(shape)
for bias_xml in biases_matrix_xml.findall('bias'):
row = int(bias_xml.attrib['row_index'])
biases_matrix[row][0] = float(bias_xml.text)
__biases_for_layers.append(biases_matrix)
print("Loaded")
def predict(inputs: list) -> list:
"""
This function will give prediction about outputs for given input.
IMPORTANT: inputs should be normalized (between 0 and 1).
Note: network should be first trained.
:param inputs: Data point to predict results for
:return: Neural network predictions
"""
predictions = __feed_forward(inputs)
raw = predictions.transpose()[0]
return list(raw)
def train_with_mini_batch_gradient_descent(data: list[tuple[list, list]], learning_rate: float = 0.1,
epoch_amount: int = 40, batch_size: int = 10,
expected_max_error: float = 0.01) -> None:
"""
This function will initiate neural network training. This may take a while.
IMPORTANT: inputs should be normalized (between 0 and 1).
:param data:
List of tuples. Each tuple is single training point.
Each tuple should be formatted as follows:
Index 0: input layer data
Index 1: expected output layer data
:param learning_rate:
It controls rate of learning. Value below 0 will rise an exception.
It is suggested to do not exceed value 1.
:param epoch_amount:
This int value controls how many iteration of learning should be performed.
:param batch_size
The size of single batch
:param expected_max_error:
While training MSE (mean square error) is being calculated.
Learning will stop if value of any training point is lower than this threshold.
"""
global __learning_rate
if learning_rate < 0:
raise Exception("Learning rate must be positive value")
if batch_size > len(data):
raise Exception('Batch size must be smaller than data length')
__learning_rate = learning_rate
for epoch in range(epoch_amount):
random.shuffle(data)
batch_begin_index = 0
while batch_begin_index < len(data):
if batch_begin_index + batch_size < len(data):
batch_samples = data[batch_begin_index:batch_begin_index+batch_size]
else:
batch_samples = data[batch_begin_index:]
input_points = [x[0] for x in batch_samples]
expected_points = [x[1] for x in batch_samples]
error = __perform_learning_iteration(input_points, expected_points)
print(f'Epoch: {epoch + 1}\n'
f'Epoch percent finish: {round(100 * batch_begin_index / len(data), 2)}%\n'
f'Batch error: {round(error, 4)}\n')
if error <= expected_max_error:
return
batch_begin_index += batch_size
def save_to_xml_file(file_path: str) -> None:
"""
Save neural network parameters to an XML file.
:param file_path: The path to the XML file to save the parameters.
:return: None
"""
xml_root = ET.Element('root')
xml_layer_sizes = ET.SubElement(xml_root, 'layers_sizes')
xml_all_weights = ET.SubElement(xml_root, 'all_weights')
xml_all_biases = ET.SubElement(xml_root, 'all_biases')
for index in range(len(__layers_sizes)):
ET.SubElement(xml_layer_sizes, 'layer_size', column_index=f'{index}').text = str(__layers_sizes[index])
ET.SubElement(xml_root, 'activation_function').text = __activation_function
for index in range(len(__weights_for_layers)):
weights = __weights_for_layers[index]
xml_single_weights_matrix = ET.SubElement(xml_all_weights, 'weights_matrix',
index=f'{index}',
row_amount=f'{weights.shape[0]}',
column_amount=f'{weights.shape[1]}')
for row in range(weights.shape[0]):
for column in range(weights.shape[1]):
ET.SubElement(xml_single_weights_matrix, 'weight',
row_index=f'{row}', column_index=f'{column}').text = str(weights[row][column])
for index in range(len(__biases_for_layers)):
biases = __biases_for_layers[index]
xml_single_biases_matrix = ET.SubElement(xml_all_biases, 'biases_matrix',
index=f'{index}',
row_amount=f'{biases.shape[0]}')
for bias_index in range(len(biases)):
ET.SubElement(xml_single_biases_matrix, 'bias',
row_index=f'{bias_index}').text = str(biases[bias_index][0])
tree = ET.ElementTree(xml_root)
ET.indent(tree, space="\t", level=0)
tree.write(file_path, encoding='UTF-8', xml_declaration=True)
def __feed_forward(inputs: list) -> np.array:
"""
Perform feedforward propagation through the neural network.
:param inputs: The input values to propagate through the network.
:return: The output values after passing through the network.
"""
# Check if input array size matches the input layer size
if np.shape(inputs) != (__layers_sizes[0],) and np.shape(inputs) != (__layers_sizes[0], 1):
raise Exception(f'Wrong input array size! Should be {(__layers_sizes[0],)} or {(__layers_sizes[0], 1)} '
f'and was {np.shape(inputs)}')
# Assign input layer values
__layers_before_activation[0] = np.array(inputs).reshape(len(inputs), 1)
current_layer_value = __layers_before_activation[0]
# Calculate values across layers, skipping input layer (index 0)
for index in range(len(__layers_before_activation) - 1):
# Multiply layer weights with its values
multiplied_by_weights_layer = np.matmul(__weights_for_layers[index], current_layer_value)
# Add biases
layer_with_added_biases = np.add(multiplied_by_weights_layer, __biases_for_layers[index])
# Apply activation function
if index == len(__layers_before_activation) - 2:
activated_layer = __softmax(layer_with_added_biases)
elif __activation_function == 'sigmoid':
activated_layer = __sigmoid(layer_with_added_biases)
elif __activation_function == 'relu':
activated_layer = __ReLU(layer_with_added_biases)
else:
raise Exception('Not supported activation function!')
# Save results in next layer
__layers_before_activation[index + 1] = layer_with_added_biases
current_layer_value = activated_layer
return current_layer_value
def __backpropagation(expected_results: list, predictions: list):
"""
Perform backpropagation to update weights and biases based on prediction errors.
:param expected_results: The expected output results.
:param predictions: The predicted output results.
:return: Lists of changes for weights and biases.
"""
# Check if expected results array size matches the output layer size
if np.shape(expected_results) != (__layers_sizes[-1],):
raise Exception(f'Wrong result array size! Should be {(__layers_sizes[-1],)} and was '
f'{np.shape(expected_results)}')
# Prepare expected results list
expected_results_transposed = np.array(expected_results).reshape(len(expected_results), 1)
# Initialize error matrix with output layer error
errors_matrix = expected_results_transposed - predictions
# Initialize lists to store changes for weights and biases
change_for_weights = [np.array([]) for x in range(len(__weights_for_layers))]
change_for_biases = [np.array([]) for x in range(len(__biases_for_layers))]
# Iterate over each weight / bias matrix in reverse order
for index in reversed(range(len(__weights_for_layers))):
# Get the derivative of activation function for each layer weighted input
if index == len(__weights_for_layers) - 1:
activation_derivative_layer = __softmax_derivative(__layers_before_activation[index + 1])
elif __activation_function == 'sigmoid':
activation_derivative_layer = __sigmoid_derivative(__layers_before_activation[index + 1])
elif __activation_function == 'relu':
activation_derivative_layer = __ReLU_derivative(__layers_before_activation[index + 1])
else:
raise Exception('Not supported activation function!')
# Calculate the gradient
gradient_matrix = activation_derivative_layer * errors_matrix * __learning_rate
# Calculate matrix with delta weights (values to change weights in given layer)
delta_weights_matrix = np.matmul(gradient_matrix, __layers_before_activation[index].transpose())
# Adjust weights and biases
change_for_weights[index] = delta_weights_matrix
change_for_biases[index] = gradient_matrix
# Calculate error for next layer with respect to its weights
errors_matrix = np.matmul(__weights_for_layers[index].transpose(), errors_matrix)
return change_for_weights, change_for_biases
def __perform_learning_iteration(data_samples: list, expected_results: list):
"""
Perform a single learning iteration using backpropagation algorithm.
:param data_samples: List of input data samples.
:param expected_results: List of expected output results.
:return: Average cross entropy error across all data samples.
"""
global __weights_for_layers, __biases_for_layers
# Initialize lists to store changes for weights and biases for all data samples
all_change_for_weights = [[] for x in range(len(__weights_for_layers))]
all_change_for_biases = [[] for x in range(len(__biases_for_layers))]
# Initialize error sum
error_sum = 0
# Iterate over each data sample and its expected result
for data_sample, expected_result in zip(data_samples, expected_results):
# Perform feedforward propagation to get predictions
predictions = __feed_forward(data_sample)
# Perform backpropagation to get changes for weights and biases
change_for_weights, change_for_biases = __backpropagation(expected_result, predictions)
# Calculate error and add it to the error sum
error_sum += __calculate_cross_entropy_cost(expected_result, predictions)
# Store changes for weights and biases for the current data sample
for index in range(len(__weights_for_layers)):
all_change_for_weights[index].append(change_for_weights[index])
all_change_for_biases[index].append(change_for_biases[index])
# Update weights and biases based on the average changes across all data samples
for index in range(len(__weights_for_layers)):
delta_weights = np.mean(all_change_for_weights[index], axis=0)
delta_biases = np.mean(all_change_for_biases[index], axis=0)
__weights_for_layers[index] = __weights_for_layers[index] + delta_weights
__biases_for_layers[index] = __biases_for_layers[index] + delta_biases
# Return the average error across all data samples
return error_sum / len(data_samples)
def __calculate_cross_entropy_cost(expected_values, real_values):
"""
Calculate the cross-entropy cost between expected and real values.
:param expected_values: The expected values.
:param real_values: The real values.
:return: The cross-entropy cost.
"""
val_sum = 0
for expected, real in zip(expected_values, real_values):
val_sum += expected * math.log(real)
return -val_sum
def __softmax(x):
"""
Compute the softmax activation function.
:param x: Input values.
:return: Softmax output.
"""
tmp = np.exp(x)
return tmp / np.sum(tmp)
def __sigmoid(x):
"""
Compute the sigmoid activation function.
:param x: Input values.
:return: Sigmoid output.
"""
return 1 / (1 + np.exp(-x))
def __ReLU(x):
"""
Compute the ReLU (Rectified Linear Unit) activation function.
:param x: Input values.
:return: ReLU output.
"""
return x * (x > 0)
def __sigmoid_derivative(x):
"""
Compute the derivative of the sigmoid activation function.
:param x: Input values.
:return: Derivative of sigmoid.
"""
sig = __sigmoid(x)
return sig * (1 - sig)
def __softmax_derivative(x):
"""
Compute the derivative of the softmax activation function.
:param x: Input values.
:return: Derivative of softmax.
"""
tmp = __softmax(x)
return tmp * (1 - tmp)
def __ReLU_derivative(x):
"""
Compute the derivative of the ReLU activation function.
:param x: Input values.
:return: Derivative of ReLU.
"""
return 1. * (x >= 0)