-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNeuralNetwork3_scalable.py
150 lines (118 loc) · 5.61 KB
/
NeuralNetwork3_scalable.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import numpy as np
import time
import sys
#import matplotlib.pyplot as pyplot
# take in input
# each input node represents a pixel
# if you are not getting near perfect accuracy, you likely have a problem in your code.
# don't need to implement bias.
# randomize initial weights
# forward pass for a batch
# aggregate errors and back propagate once
#average of time range and accuracy (range, well above 90)
#change hyper parameter by script
start_time = time.time()
def initialize_network(sizes, square_root_factor):
network = {}
for i, size in zip(range(len(sizes))[:len(sizes)-1], sizes[:len(sizes)-1]):
network['w' + str(i)] = np.random.randn(sizes[i+1], size+1) * np.sqrt(float(square_root_factor) / sizes[i+1])
return network
num_inputs = 784
num_outputs = 10
hidden_layer_1_num_nodes = 100
hidden_layer_2_num_nodes = 75
hidden_layer_3_num_nodes = 50
square_root_factor = 4
layer_sizes = [num_inputs, hidden_layer_1_num_nodes, hidden_layer_2_num_nodes, hidden_layer_3_num_nodes, num_outputs]
nn = initialize_network(layer_sizes, square_root_factor)
def sigmoid(x, derivative=False):
if derivative:
return np.exp(-x) / ((np.exp(-x) + 1) ** 2)
else:
return 1 / (1 + np.exp(-x))
def softmax(x, derivative=False):
exp_shifted = np.exp(x - x.max())
if derivative:
return exp_shifted / np.sum(exp_shifted, axis=0) * (1 - exp_shifted / np.sum(exp_shifted, axis=0))
else:
return exp_shifted / np.sum(exp_shifted, axis=0)
biases = [1, 1, 1, 1]
def forward_feed(inputs, biases):
nn_state = {}
for i in range(len(layer_sizes)):
if i == 0:
nn_state['o0'] = inputs
# bias
nn_state['o0'] = np.append(nn_state['o0'], biases[i])
elif i < len(layer_sizes)-1:
nn_state['z' + str(i)] = np.matmul(nn['w' + str(i-1)], nn_state['o' + str(i-1)])
nn_state['o' + str(i)] = sigmoid(nn_state['z' + str(i)], False)
nn_state['o' + str(i)] = np.append(nn_state['o' + str(i)], biases[i])
else:
nn_state['z' + str(i)] = np.matmul(nn['w' + str(i-1)], nn_state['o' + str(i-1)])
nn_state['o' + str(i)] = softmax(nn_state['z' + str(i)], False)
return nn_state
def calculate_gradients(inputs, biases, expected):
nn_state = forward_feed(inputs, biases)
for i in reversed(range(len(layer_sizes))):
if i == len(layer_sizes)-1:
nn_state['g' + str(i)] = nn_state['o' + str(i)] - expected
nn_state['D' + str(i-1)] = np.outer(nn_state['g' + str(i)], nn_state['o' + str(i-1)])
elif i == len(layer_sizes)-2:
nn_state['g' + str(i)] = np.matmul(nn_state['g' + str(i+1)], nn['w' + str(i)][:, 0:layer_sizes[i]]) * softmax(nn_state['z' + str(i)], derivative=True)
nn_state['g' + str(i)] = np.append(nn_state['g' + str(i)], np.matmul(nn_state['g' + str(i+1)], nn['w' + str(i)][:, [layer_sizes[i]]] * softmax(nn_state['o' + str(i)][layer_sizes[i]], derivative=True)))
nn_state['D' + str(i-1)] = np.outer(nn_state['g' + str(i)][0:layer_sizes[i]], nn_state['o' + str(i-1)])
elif i > 0:
layer_size_curr = layer_sizes[i]
layer_size_prev = layer_sizes[i+1]
nn_state['g' + str(i)] = np.matmul(nn_state['g' + str(i+1)][0:layer_size_prev], nn['w' + str(i)][0:layer_size_prev, 0:layer_size_curr]) * sigmoid(nn_state['z' + str(i)], derivative=True)
nn_state['g' + str(i)] = np.append(nn_state['g' + str(i)], np.matmul(nn_state['g' + str(i+1)][0:layer_size_prev], nn['w' + str(i)][0:layer_size_prev, [layer_size_curr]] * sigmoid(nn_state['o' + str(i)][layer_size_curr], derivative=True)))
nn_state['D' + str(i-1)] = np.outer(nn_state['g' + str(i)][0:layer_size_curr], nn_state['o' + str(i-1)])
return nn_state
epochs = 50
learning_rate = 0.2
learning_adjust_epoch = 10
learning_adjust_rate = 0.002
#images = np.genfromtxt("./train_image.csv", delimiter=",")
images = np.genfromtxt(sys.argv[1], delimiter=",")
labels = np.genfromtxt(sys.argv[2], delimiter="\n")
accuracies = []
print("learning_rate", learning_rate, "batch_size SGD", "layer_sizes",
layer_sizes, "biases", biases, "learning adjust epoch", learning_adjust_epoch,
"learning adjust rate", learning_adjust_rate, "epochs", epochs)
for e in range(epochs):
#stabilize because accuracy tends to go above threshold at this point, and drop if not adjusted
if e == learning_adjust_epoch:
learning_rate = learning_adjust_rate
print('epoch', e)
start_time = time.time()
cost = 0
num_correct = 0
num_samples = 0
for i in range(10000):
input = images[i]
# normalize input
input = (input / 255).astype('float32')
label = labels[i]
expected_values = np.zeros(num_outputs)
#divide input by 10
expected_values[int(label)] = 1
nn_state = calculate_gradients(input, biases, expected_values)
for i in range(len(layer_sizes)-1):
nn['w' + str(i)] -= learning_rate * nn_state['D' + str(i)]
if np.argmax(nn_state['o' + str(len(layer_sizes)-1)]) == np.argmax(expected_values):
num_correct += 1
num_samples += 1
accuracy = num_correct / 10000
accuracies.append(accuracy)
print('accuracy:', accuracy)
end_time = time.time()
print(end_time - start_time, "seconds")
images_test = np.genfromtxt(sys.argv[3], delimiter=",")
#normalize the data
predictions = []
for input in images_test:
input = (input / 255).astype('float32')
nn_state = forward_feed(input, biases)
predictions.append(np.argmax(nn_state['o4']))
np.savetxt("test_predictions.csv", predictions, delimiter=",")