-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_network.py
201 lines (189 loc) · 8.49 KB
/
train_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys
sys.path.insert(0, './lab/')
from MultilayerPerceptron import MultilayerPerceptron
def read_dataset():
"""
Reads the dataset with the cleaned data.
"""
if len(sys.argv) != 3:
print("\033[1m\033[91mError: read_dataset. train_network.py takes two arguments.\n\033[0m")
sys.exit(1)
try:
data_train = int(sys.argv[2])
except:
print("\033[1m\033[91mError: read_dataset. train_network.py second argument must be int.\n\033[0m")
sys.exit(1)
if data_train <= 0 or data_train >= 100:
print("\033[1m\033[91mError: read_dataset. train_network.py second argument must be between 1-99.\n\033[0m")
sys.exit(1)
try:
dataset = pd.read_csv(sys.argv[1])
data_train = round(dataset.shape[0] * (data_train / 100))
dataset_train = dataset.iloc[:data_train]
dataset_test = dataset.iloc[data_train:]
except:
print("\033[1m\033[91mError: read_dataset. {} can't be read.\n\033[0m".format(sys.argv[1]))
sys.exit(1)
return dataset_train, dataset_test
def get_hidden_weights(topology, input_len):
"""
Creates the weights that will be given to the multilayer-perceptron.
"""
if not type(topology) == tuple:
print("\033[1m\033[91mError: get_weights. topology must be a tuple.\n\033[0m")
sys.exit(1)
if not type(input_len) == int or input_len <= 0:
print("\033[1m\033[91mError: get_weights. input_len must be a positive int.\n\033[0m")
sys.exit(1)
weights = list()
np.random.seed(42)
for pos in range(len(topology)):
if pos == 0:
layer = np.random.rand(topology[pos], input_len) * np.sqrt(1 / (input_len))
else:
layer = np.random.rand(topology[pos], topology[pos - 1] + 1) * np.sqrt(1 / (topology[pos - 1] + 1))
weights.append(layer)
return weights
def give_backp_format(data, opt_outputs):
"""
Formats the values that will be used to train the multilayer.
"""
X = np.copy(data.iloc[:,1:].to_numpy())
Y = np.copy(data.iloc[:,0].to_numpy())
Y_format = np.zeros((Y.shape[0], opt_outputs.size))
for row in range(Y.shape[0]):
index = np.where(opt_outputs == Y[row])[0]
if index.size:
Y_format[row][index[0]] = 1.0
return X, Y_format
def normalize_values(X_train, X_test):
"""
Normalizes the independend variables and returns the mean, max and min.
"""
X = np.copy(X_train.transpose())
mean_list = np.zeros(X.shape[0])
max_list = np.zeros(X.shape[0])
min_list = np.zeros(X.shape[0])
for feature in range(X.shape[0]):
mean_list[feature] = X[feature].mean()
max_list[feature] = X[feature].max()
min_list[feature] = X[feature].min()
X_train = X_train.transpose()
X_test = X_test.transpose()
for feature in range(X.shape[0]):
X_train[feature] = (X_train[feature] - mean_list[feature]) / (max_list[feature] - min_list[feature])
X_test[feature] = (X_test[feature] - mean_list[feature]) / (max_list[feature] - min_list[feature])
X_train = X_train.transpose()
X_test = X_test.transpose()
return mean_list, max_list, min_list
def predict(X, multilayer, opt_outputs):
"""
Performs a prediction for each of the given inputs.
"""
Y_hat_num = np.zeros((X.shape[0], opt_outputs.size), dtype = float)
Y_hat_str = np.zeros((X.shape[0]), dtype = object)
for pos in range(X.shape[0]):
val = multilayer.forward_propagation(X[pos])
Y_hat_num[pos] = val
Y_hat_str[pos] = opt_outputs[np.where(val == max(val))[0][0]]
return Y_hat_num, Y_hat_str
def cost(Y, Y_hat, opt_outputs):
"""
Performs a binary cross-entropy error function to evaluate the accuracy of the model.
"""
Y = np.copy(Y.transpose())
Y_hat = np.copy(Y_hat.transpose())
cost = 0.0
for option in range(opt_outputs.size):
cost += -sum(Y[option] * np.log(Y_hat[option]) + (1 - Y[option]) * np.log(1 - Y_hat[option])) / Y.shape[1]
return cost / opt_outputs.size
def display_graph(X_axis, train_cost, train_acc, test_cost, test_acc):
"""
Displays a graph of the cost showing the learning curve of the algorithm.
"""
fig, axis = plt.subplots(1, 2)
axis[0].plot(X_axis, train_cost, alpha = 0.75, linewidth = 2, color = "darkred", label = "Train Cost")
axis[0].plot(X_axis, test_cost, alpha = 0.75, linewidth = 2, color = "salmon", label = "Test Cost")
axis[0].set_xlabel("Epochs")
axis[0].set_ylabel("Cost")
axis[0].set_title("Train cost: {:.4f} Test cost: {:.4f}".format(train_cost[-1], test_cost[-1]))
axis[0].legend()
axis[0].grid()
axis[1].plot(X_axis, train_acc, alpha = 0.75, linewidth = 2, color = "darkcyan", label = "Train Accuracy")
axis[1].plot(X_axis, test_acc, alpha = 0.75, linewidth = 2, color = "skyblue", label = "Test Accuracy")
axis[1].set_xlabel("Epochs")
axis[1].set_ylabel("Accuracy")
axis[1].set_title("Train accuracy: {:.4f}% Test accuracy: {:.4f}%".format(train_acc[-1] * 100, test_acc[-1] * 100))
axis[1].legend()
axis[1].grid()
plt.show()
def train_model(opt_outputs, Y_train_str, Y_test_str, X_train, Y_train, X_test, Y_test, alpha, max_iter):
"""
Trains the multilayer-perceptron using backpropagation and gradient descent.
"""
index_list = list()
train_cost_list = list()
train_acc_list = list()
test_cost_list = list()
test_acc_list = list()
with open("historic.txt", "w") as f:
f.write("An historic of the metric obtained during training:\n")
for i in range(max_iter):
multilayer.back_propagation(X_train, Y_train, alpha)
Y_hat_train_num, Y_hat_train_str = predict(X_train, multilayer, opt_outputs)
Y_hat_test_num, Y_hat_test_str = predict(X_test, multilayer, opt_outputs)
train_cost = cost(Y_train, Y_hat_train_num, opt_outputs)
test_cost = cost(Y_test, Y_hat_test_num, opt_outputs)
train_acc = accuracy_score(Y_train_str, Y_hat_train_str)
test_acc = accuracy_score(Y_test_str, Y_hat_test_str)
index_list.append(i)
train_cost_list.append(train_cost)
train_acc_list.append(train_acc)
test_cost_list.append(test_cost)
test_acc_list.append(test_acc)
print("epoch {}/{} - loss: {:.4f} - val_loss: {:.4f} - acc: {:.4f}% - val_acc: {:.4f}%".format(i + 1, max_iter, train_cost, test_cost, train_acc * 100, test_acc * 100))
with open("historic.txt", "a") as f:
f.write("epoch {}/{} - loss: {:.4f} - val_loss: {:.4f} - acc: {:.4f}% - val_acc: {:.4f}%\n".format(i + 1, max_iter, train_cost, test_cost, train_acc * 100, test_acc * 100))
display_graph(index_list, train_cost_list, train_acc_list, test_cost_list, test_acc_list)
return
def save_weights(multilayer, mean_list, max_list, min_list):
"""
Save the weights of the multilayer-perceptron model to a csv file.
"""
with open('weights.txt', 'w') as f:
f.write(str(multilayer.get_size()) + '\n')
for layer in multilayer.get_layers():
f.write(str(layer.get_neurons()) + ' ' + str(layer.get_dendrites()) + '\n')
for perceptron in layer.get_perceptrons():
for weight in perceptron.get_weights():
f.write(str(weight) + ' ')
f.write('\n')
f.write("Mean: ")
for val in mean_list:
f.write(str(val) + ' ')
f.write("\nMax: ")
for val in max_list:
f.write(str(val) + ' ')
f.write("\nMin: ")
for val in min_list:
f.write(str(val) + ' ')
print("\033[1m\nDone! weights.txt file has been created and saved.\n\033[0m")
return
if __name__ == '__main__':
train, test = read_dataset()
opt_outputs = train.iloc[:, 0].unique()
topology = (10, 5, opt_outputs.size)
hidden_weights = get_hidden_weights(topology, input_len = train.shape[1] - 1)
multilayer = MultilayerPerceptron(train.shape[1] - 1, hidden_weights)
X_train, Y_train = give_backp_format(train, opt_outputs)
X_test, Y_test = give_backp_format(test, opt_outputs)
Y_train_str = np.copy(train.iloc[:, 0].to_numpy())
Y_test_str = np.copy(test.iloc[:, 0].to_numpy())
mean_list, max_list, min_list = normalize_values(X_train, X_test)
train_model(opt_outputs, Y_train_str, Y_test_str, X_train, Y_train, X_test, Y_test, alpha = 0.01, max_iter = 70)
save_weights(multilayer, mean_list, max_list, min_list)
sys.exit(0)