-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain_evaluation.py
executable file
·116 lines (106 loc) · 5.04 KB
/
main_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import numpy as np
import os
import random
from datetime import datetime
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
from modules.other.utils import read_data, RandomTrial
from modules.other import hyperparameters_ciarp2024
from modules.models import ModelSNNPC
DATASET_LIST = ["Base", "Variant I", "Variant II", "Variant III", "Variant IV", "Variant V"]
NUM_TRIALS = 100
BEGIN_TRIAL = 0
BASE_SEED = 42
PATH = os.path.dirname(os.path.realpath(__file__))
METRICS_NAME_GLOBAL = ["accuracy", "precision", "recall", "fpr", "f1_score","auc"]
METRICS_NAME_5FPR = ["accuracy@5FPR","precision@5FPR", "recall@5FPR", "fpr@5FPR", "f1_score@5FPR"]
METRICS_FAIRNESS = ["fpr_ratio_age", "fpr_ratio_income", "fpr_ratio_employment"]
HYPERPARAMETERS = hyperparameters_ciarp2024.P2_S20
#HYPERPARAMETERS = hyperparameters_ciarp2024.P20_S20
#HYPERPARAMETERS = hyperparameters_ciarp2024.P200_S20
#HYPERPARAMETERS = hyperparameters_ciarp2024.P2_S50
#HYPERPARAMETERS = hyperparameters_ciarp2024.P20_S50
#HYPERPARAMETERS = hyperparameters_ciarp2024.P200_S50
EXPERIMENT_NAME = f"CIARP2024-P{HYPERPARAMETERS['population']}-S{HYPERPARAMETERS['step']}-{NUM_TRIALS}trials-begin{BEGIN_TRIAL}"
FIXED_DATE = None
def dataset_loop(train_dfs, test_dfs, dataset_name, trial_number, seed, path, runs):
x_train = train_dfs[dataset_name].drop(columns=["fraud_bool"])
y_train = train_dfs[dataset_name]["fraud_bool"]
x_test = test_dfs[dataset_name].drop(columns=["fraud_bool"])
y_test = test_dfs[dataset_name]["fraud_bool"]
num_classes = len(np.unique(y_train))
num_features = len(x_train.columns)
class_weights = (1-HYPERPARAMETERS['weight'], HYPERPARAMETERS['weight'])
model = ModelSNNPC(
num_features=num_features,
num_classes=num_classes,
class_weights=class_weights,
betas=HYPERPARAMETERS['beta'],
slope=HYPERPARAMETERS['slope'],
thresholds=HYPERPARAMETERS['threshold'],
population=HYPERPARAMETERS['population'],
batch_size=HYPERPARAMETERS['batch'],
num_epochs=HYPERPARAMETERS['epoch'],
num_steps=HYPERPARAMETERS['step'],
adam_betas=HYPERPARAMETERS['adam_beta'],
learning_rate=HYPERPARAMETERS['learning_rate'],
verbose=0
)
model.fit(x_train, y_train)
predictions, targets = model.predict(x_test, y_test)
metrics = model.evaluate(targets, predictions)
metrics_aequitas = model.evaluate_business_constraint(targets, predictions)
metrics.update(metrics_aequitas)
fairness_age = model.evaluate_fairness(x_test, targets, predictions, "customer_age", 50)
metrics.update({k+"_age": v for k, v in fairness_age.items()})
fairness_income = model.evaluate_fairness(x_test, targets, predictions, "income", 0.5)
metrics.update({k+"_income": v for k, v in fairness_income.items()})
fairness_employement = model.evaluate_fairness(x_test, targets, predictions, "employment_status", 3)
metrics.update({k+"_employment": v for k, v in fairness_employement.items()})
results = {}
results["dataset"] = dataset_name
results["trial"] = trial_number
results["seed"] = seed
for metric in METRICS_NAME_GLOBAL:
results[metric] = metrics[metric]
for metric in METRICS_NAME_5FPR:
results[metric] = metrics_aequitas[metric]
for metric in METRICS_FAIRNESS:
results[metric] = metrics[metric]
csv_row = ','.join([str(x) for x in results.values()])
with open(path, "a") as f:
f.write(f"{csv_row}\n")
prev_runs = runs.get(dataset_name, [])
prev_runs.append(results)
runs[dataset_name] = prev_runs
return runs
def simulation(datasets, train_dfs, test_dfs, path="./results.csv"):
np.random.seed(BASE_SEED)
seeds = np.random.choice(list(range(1_000_000)), size=NUM_TRIALS, replace=False)
runs = {}
for trial in range(NUM_TRIALS):
seed = seeds[trial]
trial_number = trial
trial = RandomTrial(seed=seed)
if trial_number < BEGIN_TRIAL:
print(f"Skipping trial {trial_number} – seed {seed}")
continue
for dataset_name in datasets.keys():
print(f"Running trial {trial_number} with seed {seed} on dataset {dataset_name}")
dataset_loop(train_dfs, test_dfs, dataset_name, trial_number, seed, path, runs)
def main():
base_path = f"{PATH}/../data/"
_, datasets, train_dfs, test_dfs = read_data(base_path, DATASET_LIST)
if not FIXED_DATE:
date = datetime.now().strftime("%Y%m%d_%H%M%S")
else:
date = FIXED_DATE
experiment_dir = f"{PATH}/results/{date}-{EXPERIMENT_NAME}"
results_path = f"{experiment_dir}/results.csv"
os.makedirs(experiment_dir, exist_ok=True)
if not os.path.exists(results_path):
with open(results_path, "w") as f:
f.write("dataset,trial,seed,accuracy,precision,recall,fpr,f1_score,auc,accuracy@5FPR,precision@5FPR,recall@5FPR,fpr@5FPR,f1_score@5FPR,fpr_ratio_age,fpr_ratio_income,fpr_ratio_employment\n")
simulation(datasets, train_dfs, test_dfs, path=results_path)
if __name__ == "__main__":
main()