-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhyperparaTune.py
81 lines (69 loc) · 2.8 KB
/
hyperparaTune.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Author: Laurin Koch
# Date: 2021
"""
Script to tune hyperparameter such as learning rate, hidden ratio, etc.
"""
from Kitsune import Kitsune
import numpy as np
import os
import pandas as pd
import pickle
import csv
from sklearn.metrics import f1_score
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
data_dir = 'data_sdc11073/results'
pcapng_traintestData_path = os.path.join(data_dir, 'capture_OPtable_traintest2.pcapng')
csv_traintestData_gt_path = os.path.join(data_dir, 'capture_OPtable_traintest2_gt.csv')
csv_outfile = os.path.join(data_dir, 'hyperpara_optimization_infos.csv')
packet_limit = np.Inf
FMgrace = 5000
ADgrace = 60000
ITERATION = 0
def objective(space):
global ITERATION
ITERATION += 1
NIDS = Kitsune(pcapng_traintestData_path,
packet_limit,
max_autoencoder_size = space['max_AE'],
FM_grace_period = FMgrace,
AD_grace_period = ADgrace,
learning_rate = space['learning_rate'],
hidden_ratio = space['hidden_ratio'],
sensitivity = space['sensitivity'])
packet_count = NIDS.packet_count
for _ in range(packet_count):
rmse = NIDS.proc_next_packet()
if rmse == -1:
break
logs = NIDS.logs
preds = [item[1] for item in logs]
gt_data = pd.read_csv(csv_traintestData_gt_path, usecols=['anomaly'])
gt = gt_data.anomaly.tolist()
exec_start_idx = FMgrace + ADgrace
gt_exec = gt[exec_start_idx:]
f1 = f1_score(gt_exec, preds)
loss = 1-f1
# store optimization information in csv file
with open(csv_outfile, 'a', newline='') as f:
writer = csv.writer(f, delimiter=',')
writer.writerow([loss, space, ITERATION])
return {'loss': loss, 'params': space, 'iteration': ITERATION, 'status': STATUS_OK}
# hyperparameter space, where each hyperparameter has its own probability distribution
space = {
'max_AE': hp.quniform('max_AE', 1, 20, 1), # discrete uniform distribution
'learning_rate': hp.loguniform('learning_rate', np.log(0.001), np.log(0.2)), # logarithmic uniform distribution
'hidden_ratio': hp.uniform('hidden_ratio', 0.0, 1.0),
'sensitivity': hp.uniform('sensitivity', 0.0, 1.0)
}
# write headers to csv file
with open(csv_outfile, 'w', newline='') as fout:
writer = csv.writer(fout, delimiter=',')
writer.writerow(['loss', 'params', 'iteration'])
# result history with dictionary returned from the object function
trials = Trials()
# algorithm used for optimization: Tree Parzen Estimator
best_hyperpara = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=5, trials=trials)
print(best_hyperpara)
# store results to evaluate them in jupyter notebook
with open('models/hyperpara_24092021.pkl', 'wb') as f:
pickle.dump([best_hyperpara, trials], f)