-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdtc_cli.py
67 lines (51 loc) · 2.56 KB
/
dtc_cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import argparse
import os
from sklearn.model_selection import train_test_split
import category_encoders as ce
import pandas as pd
from config import DATASET_FOLDER_PATH, TEST_SIZE, RANDOM_STATE, MAX_DEPTH
from metrics import calculation
from algo.decisionTreeClassifier import DecisionTreeClassifier
from storage.metrics.metrics_saver import save_results, save_multilabel_matrix, save_matrix_to_csv
def main(dataset_filename):
dataset_path = os.path.join(DATASET_FOLDER_PATH, dataset_filename)
if not os.path.exists(dataset_path):
raise FileNotFoundError(dataset_path)
df = pd.read_csv(dataset_path)
columns_names = df.columns.tolist()
columns_names[-1] = 'class'
df.columns = columns_names
labels = sorted(df['class'].unique())
df = ce.OrdinalEncoder(cols=list(df.columns[:-1])).fit_transform(df)
x = df.drop(['class'], axis=1)
y = df['class']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=TEST_SIZE, random_state=RANDOM_STATE)
dtc = DecisionTreeClassifier(max_depth=MAX_DEPTH)
dtc.train(x_train, y_train)
y_pred_train = dtc.predict(x_train)
y_pred_test = dtc.predict(x_test)
tree = dtc.tree
tree.make_dot_files()
accuracy_score_train = calculation.accuracy_score(y_train, y_pred_train)
accuracy_score_test = calculation.accuracy_score(y_test, y_pred_test)
matrix_dict = calculation.build_matrix_dict(labels)
conf_matrix = calculation.confusion_matrix(y_true=y_test, y_pred=y_pred_test, matrix_dict=matrix_dict)
m_dict = calculation.build_multilabel_matrix_dict(labels)
multilabel_matrix_dict, multilabel_matrix = calculation.multilabel_matrix(y_true=y_test,
y_pred=y_pred_test,
matrix_dict=m_dict)
report_dict = calculation.classification_report(multilabel_matrix_dict)
results = {
"dataset": dataset_filename,
"accuracy_train": accuracy_score_train,
"accuracy_test": accuracy_score_test,
"classification_report": report_dict
}
save_results(results)
save_matrix_to_csv(conf_matrix, labels, "matrix.csv")
save_multilabel_matrix(multilabel_matrix, "multilabel.csv")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Run the classification program with a specific dataset file.")
parser.add_argument("dataset_filename", type=str, help="The name of the dataset file (e.g., iris.csv)")
args = parser.parse_args()
main(args.dataset_filename)