-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathBaseline.py
98 lines (74 loc) · 4.99 KB
/
Baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score
from sklearn.cluster import KMeans
import numpy as np
import argparse
import logging
import torch
import os
from src.utils import setup_device, setup_logging_level, plot_baseline_training_metrics, hungarian_accuracy
from src.training_procedures import train_baseline_model
from src.import_utils import import_dataset_with_name
from src.BaselineModel import BaselineModel
def argument_parser():
"""
A parser to allow the user to easily experiment different parameters along with different datasets.
"""
parser = argparse.ArgumentParser(usage='python Baseline.py [dataset_name] [options]',
description='This program allows to run the baseline and compute the different performance metrics.')
parser.add_argument('--dataset_name', type=str, required=True,
choices=['mnist', 'ForestCoverType', 'LetterRecognition', 'HumanActivityRecognition', 'Satimage', 'Pendigits', 'USCensus1990'],
help='The name of the dataset to import.')
parser.add_argument('--use_cuda', type=str, default='True',
choices=['True', 'False'], required=False,
help='Set to True if you want the code to be run on your GPU. If set to False, code will run on CPU.')
parser.add_argument('--log_lvl', type=str, default='info',
choices=['debug', 'info', 'warning'], required=False,
help='Change the log display level.')
return parser.parse_args()
if __name__ == '__main__':
args = argument_parser()
device = setup_device(use_cuda=True if args.use_cuda == 'True' else False)
setup_logging_level(args.log_lvl)
# ====================== Step 1 - Import the datasets =======================
logging.info("Importing " + args.dataset_name + "...")
x_train, y_train, x_unlab, y_unlab, x_test, y_test, x_full, y_full, y_full_classifier, y_train_classifier, y_test_classifier, grouped_unknown_class_val, classifier_mapping_dict, cat_columns_indexes = import_dataset_with_name(args.dataset_name, device)
# We get the training and testing data for the known classes only
x_test_known = x_test[np.in1d(y_test, y_train)]
y_test_known = y_test[np.in1d(y_test, y_train)]
x_test_unknown = x_test[np.in1d(y_test, y_unlab)]
y_test_unknown = y_test[np.in1d(y_test, y_unlab)]
# And then map them to {0, ..., |C|}
mapper, ind = np.unique(y_train, return_inverse=True)
mapping_dict = dict(zip(y_train, ind))
# Map the training and testing values
y_train_mapped = np.array(list(map(mapping_dict.get, y_train)))
y_test_known_mapped = np.array(list(map(mapping_dict.get, y_test_known)))
# ===========================================================================
# =================== Step 2 - Define and train the model ===================
model = BaselineModel(x_train.shape[1], len(np.unique(y_train)), p_dropout=0.2).to(device)
logging.info(model)
losses_dict = train_baseline_model(model, device, x_train, y_train_mapped, x_test_known, y_test_known_mapped, num_epochs=50)
fig_path = os.path.join('.', 'figures', str(args.dataset_name) + '_baseline_training_metric.svg')
logging.info('Saving metrics training curves in ' + str(fig_path))
plot_baseline_training_metrics(losses_dict, fig_path)
# ===========================================================================
# ============ Step 3 - Train a K-Means on the penultimate layer ============
model.eval()
with torch.no_grad():
x_unlab_projection = model.neural_net_forward(x_unlab)
x_test_unknown_projection = model.neural_net_forward(x_test_unknown)
model.train()
km = KMeans(n_clusters=len(np.unique(y_unlab)))
km_y_test_unknown_pred = km.fit_predict(x_test_unknown_projection.cpu())
km_y_unlab_pred = km.fit_predict(x_unlab_projection.cpu())
# ===========================================================================
# ==================== Step 4 - Evaluate the prediction =====================
train_acc, train_bacc = hungarian_accuracy(y_unlab, km_y_unlab_pred)
train_ari = adjusted_rand_score(y_unlab, km_y_unlab_pred)
train_nmi = normalized_mutual_info_score(y_unlab, km_y_unlab_pred)
logging.info("Train clustering accuracy: {:.3f} / balanced acc.: {:.3f} / ari.: {:.3f} / nmi.: {:.3f}".format(train_acc, train_bacc, train_ari, train_nmi))
test_acc, test_bacc = hungarian_accuracy(y_test_unknown, km_y_test_unknown_pred)
test_ari = adjusted_rand_score(y_test_unknown, km_y_test_unknown_pred)
test_nmi = normalized_mutual_info_score(y_test_unknown, km_y_test_unknown_pred)
logging.info("Test clustering accuracy: {:.3f} / balanced acc.: {:.3f} / ari.: {:.3f} / nmi.: {:.3f}".format(test_acc, test_bacc, test_ari, test_nmi))
# ===========================================================================