-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMSE.py
154 lines (136 loc) · 5.68 KB
/
MSE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import argparse, random
import numpy as np
import pandas as pd
from statistics import mean
from utils.read_data import read_data
from utils.metrics import metrics, plot_val_cf_matrix
from sklearn.preprocessing import StandardScaler
parser = argparse.ArgumentParser()
parser.add_argument('--M', default=4, help='M-fold cross validation')
parser.add_argument('--b', default=1, help='MSE clf parameter')
parser.add_argument('--epoch', default=200, help='# epochs trained')
parser.add_argument('--standardization', action='store_true', help='use standardization')
parser.add_argument ('--feat_reduction', action='store_true', help='drop four least contributing features')
parser.add_argument ('--extra_feat', action='store_true', help='Create extra features utilizing Date')
parser.add_argument ('--feat54', action='store_true', help='54 features')
parser.add_argument('--plot_title', default='', help='title for cf_matrix plot')
args = parser.parse_args()
def J_value(data, label, w):
J = 0
N = len(label)
for i in range(N):
z = 1 if label.iloc[i] == 1 else -1
x = data[i]
L = np.dot(w, x) * z
J += (L-float(args.b)) ** 2
J /= N
return J
def predict(data, w):
result = []
for i in range(len(data)):
x = data[i]
if np.dot(w, x) > 0:
result.append(1)
else:
result.append(0)
return result
def init_train_param(D):
"""
Args: D: # features
"""
w = [] # weight vector
for k in range(D):
w.append(random.uniform(0.001, 0.1))
it = 0 # iteration counter
lr = 100/(1000+it) #learning rate
not_l_s = False # not linearly separable
c_c = 0 # correctly classified
w_vec, J_vec = [None] * 500, [None] * 500
return w, it, lr, not_l_s, c_c, w_vec, J_vec
def train(X, y, N, idx, w, it, lr, not_l_s, c_c, w_vec, J_vec):
"""
Args:
idx: training dataframe index for shuffling use
it: iteration counter
not_l_s: not linearly separable (bool var.); default: False
c_c: # correctly classified pts; default: 0
w_vec: list storing weight vectors
J_vec: list storing loss values
Return: weight vector that gives the lowest loss
"""
for epoch in range(int(args.epoch)):
# Shuffle at each epoch
idx = random.sample(list(idx),N)
if it >= 20000:
not_l_s = True
break
for i in idx:
if c_c == N:
w_hat = w
print("data is linearly separable")
print("w_hat=", w_hat)
print("J=", J_value(X, y, w_hat))
it += 1
if it >= 9501 and it <= 10000:
w_vec[it-9501] = w
if it == 10000: break
x = X[i]
z = 1 if y.iloc[i] == 1 else -1
Delta = np.dot(w, x) * z
if Delta <= 0:
if c_c > 0:
c_c = 0
else:
c_c += 1
w = w - lr * 2 / N * (Delta-float(args.b)) * z * x
if not_l_s:
for i,w in enumerate(w_vec):
J_vec[i] = J_value(X, y, w)
index = np.argmin(J_vec)
w_hat = w_vec[index]
print("w_hat=", w_hat)
print("J=", J_vec[index])
return w_hat
def main():
X_tr, y_tr = read_data('datasets/algerian_fires_train.csv')
X_test, y_test = read_data('datasets/algerian_fires_test.csv')
# drop first column ("Date" feature)
X_tr, X_test = X_tr.iloc[:,1:], X_test.iloc[:,1:]
F1_result, Acc_result, TP, TN, FP, FN = [0]*int(args.M), [0]*int(args.M), [0]*int(args.M), [0]*int(args.M), [0]*int(args.M), [0]*int(args.M)
scaler = StandardScaler()
for m in range(int(args.M)):
X_val, y_val = X_tr.iloc[46*m:46*(m+1)], y_tr.iloc[46*m:46*(m+1)]
if m == 0: X_tr_prime, y_tr_prime = X_tr.iloc[46:], y_tr.iloc[46:]
elif m == 1:
X_tr_prime = pd.concat([X_tr.iloc[:46], X_tr.iloc[92:]])
y_tr_prime = pd.concat([y_tr.iloc[:46], y_tr.iloc[92:]])
elif m == 2:
X_tr_prime = pd.concat([X_tr.iloc[:92], X_tr.iloc[138:]])
y_tr_prime = pd.concat([y_tr.iloc[:92], y_tr.iloc[138:]])
else: X_tr_prime, y_tr_prime = X_tr.iloc[:138], y_tr.iloc[:138]
# Shuffle
N = X_tr_prime.shape[0]
idx = np.arange(N)
D = X_tr_prime.shape[1]
w, it, lr, not_linearly_separable, correctly_classified, w_vec, J_vec \
= init_train_param(D)
X_tr_prime = scaler.fit_transform(X_tr_prime)
X_val = scaler.transform(X_val)
w_hat = train(X_tr_prime, y_tr_prime, N, idx, w, it, lr, \
not_linearly_separable, correctly_classified, w_vec, J_vec)
y_val_pred = predict(X_val, w_hat)
F1_result[m], Acc_result[m], TP[m], TN[m], FP[m], FN[m] = metrics(y_val, y_val_pred, args.plot_title, work='val')
print("Val F1_score=", mean(F1_result), "Val Accuracy=", mean(Acc_result))
plot_val_cf_matrix(y_val, y_val_pred, args.plot_title, mean(TP), mean(TN), mean(FP), mean(FN))
print("Training with full dataset!")
w, it, lr, not_linearly_separable, correctly_classified, w_vec, J_vec \
= init_train_param(D)
X_tr = scaler.fit_transform(X_tr)
X_test = scaler.transform(X_test)
w_hat = train(X_tr, y_tr, N, idx, w, it, lr, \
not_linearly_separable, correctly_classified, w_vec, J_vec)
y_test_pred = predict(X_test, w_hat)
F1_score, Accuracy = metrics(y_test, y_test_pred, args.plot_title)
print("Test F1_score=", F1_score, "Test Accuracy=", Accuracy)
if __name__ == '__main__':
main()