-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
52 lines (39 loc) · 1.54 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
import pandas as pd
from train.train import train_kfold
from preprocess.preprocessing import preprocess_data
def initialize_results():
"""Initialize results dictionary and dataframe."""
results = {}
results_df = pd.DataFrame(columns=[
"Data Folder", "Accuracy", "F1-macro", "F1-weighted",
"AUC score", "PRC score", "Uncertainty"
])
return results, results_df
def preprocess_and_train(data_folder, model_path):
"""Preprocess data and execute k-fold training.
Args:
data_folder (str): Name of the data folder to process.
model_path (str): Path to save model checkpoints.
Returns:
dict: Results of the training.
"""
print(f"Processing {data_folder}...")
preprocess_data(data_folder)
data_folder_path = os.path.join("..", "data", data_folder)
testonly = False
result = train_kfold(data_folder_path, model_path, testonly)
return result
def main():
"""Main function to execute preprocessing and training."""
# Initialization
results, results_df = initialize_results()
# Defining model path and dataset names
model_path = os.path.join("..", "results", "checkpoints")
datasets = ["COADREAD"] # Extend this list for processing multiple datasets
for data_folder in datasets:
result = preprocess_and_train(data_folder, model_path)
results[data_folder] = result
# Further code to handle results, save to files, etc. can be added here.
if __name__ == "__main__":
main()