-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_prl.py
114 lines (91 loc) · 3.73 KB
/
run_prl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import sys
import json
import numpy as np
from optparse import OptionParser
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, normalize
from prl.prl import *
from prl.genF import *
from prl.genP import *
from prl.evaluation import *
from prl.solvers import *
#LOGGER SETUP
import logging
logging.basicConfig(level=logging.INFO,
format="[%(asctime)s] %(filename)s - %(message)s",
datefmt='%H:%M:%S-%d%m%y')
def manage_options():
"""Manages the options of the command line.
:returns: a dictionary containg the options and their associated values
:rtype: dictionary
"""
parser = OptionParser(usage="usage: %prog [options] dataset_file", version="%prog 1.0")
parser.add_option("-s", "--seed", dest="seed", default=42, help="Pseudo-random seed for replicability", type="int")
parser.add_option("-t", "--test_size", dest="test_size", default=.3, help="Test set size in percentage [0,1]")
parser.add_option("-n", "--normalize", dest="normalize", default=1, help="Whether the instances has to be normalized or not (default:1) - 0:No, 1:MinMax standardization, 2:L2 normalization", type="int")
parser.add_option("-c", "--config_file", dest="config_file", default="./config/config.json", help="Configuration file")
parser.add_option("-v", "--verbose", dest="verbose", default=False, help="Verbose output", action="store_true")
(options, args) = parser.parse_args()
if len(args) == 0:
parser.error("Wrong arguments")
out_dict = vars(options)
out_dict["dataset"] = args[0]
return out_dict
#INPUT
options = manage_options()
logging.info("Options: %s" %options)
#
#LOADING DATA
X, y = load_svmlight_file(options["dataset"])
X, y = X.toarray(), y.astype(int)
# maps labels into the range 0,..,m-1
unique_y = np.unique(y)
dim = len(unique_y)
map_y = dict(zip(unique_y, range(len(unique_y))))
y = np.array([map_y[i] for i in y])
#TRAINING-TEST SET SPLIT
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=options["test_size"], random_state=options["seed"])
if options["normalize"] == 1:
scaler = MinMaxScaler()
scaler.fit(Xtr)
Xtr = scaler.transform(Xtr)
Xte = scaler.transform(Xte)
elif options["normalize"] == 2:
Xtr = normalize(Xtr)
Xte = normalize(Xte)
#
#LOAD CONFIGURATION FILE
with open(options['config_file'], "r") as f:
data = json.load(f)
logging.info("Configuration: %s" %data)
if "feat_gen" in data:
genf_class = getattr(__import__("prl.genF"), data['feat_gen'])
gen_col = genf_class(Xtr, *data['feat_gen_params'])
elif "kernel_gen" in data:
genk_class = getattr(__import__("prl.genK"), data['kernel_gen'])
gen_col = genk_class(*data['kernel_gen_params'])
if data["pref_generator"] == "micro":
gen_pref_training = GenMicroP(Xtr, ytr)
gen_pref_test = GenMicroP(Xte, yte)
else: #if not micro
gen_pref_training = GenMacroP(Xtr, ytr)
gen_pref_test = GenMacroP(Xte, yte)
budget = data["columns_budget"]
iterations = data["iterations"]
solver_class = getattr(__import__("prl.solvers"), data['solver'])
solver = solver_class(*data['solver_params'])
prl_alg = getattr(__import__("prl.prl"), data['algorithm'])
#
#TRAINING PRL
prl = prl_alg(gen_pref_training, gen_col, dim, budget, solver)
prl.fit(iterations, options["verbose"])
#
#EVALUATION
acc, conf = accuracy(prl, gen_pref_test)
bacc, _ = balanced_accuracy(prl, gen_pref_test, conf)
logging.info("Accuracy: %.3f" %acc)
logging.info("Balanced accuracy: %.3f" %bacc)
logging.info("Confusion matrix:\n%s" %conf)
#
logging.shutdown()