Skip to content

Commit

Permalink
add PDL model, benchmark comparasion and example from parma
Browse files Browse the repository at this point in the history
  • Loading branch information
v1docq committed Nov 28, 2024
1 parent 5855e4a commit 7d6490a
Show file tree
Hide file tree
Showing 9 changed files with 231 additions and 74 deletions.
77 changes: 37 additions & 40 deletions benchmark/benchmark_TSC.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self,
self.logger = logging.getLogger(self.__class__.__name__)

self.experiment_setup = experiment_setup
self.init_assumption = deepcopy(self.experiment_setup['initial_assumption'])
self.multi_TSC = MULTI_CLF_BENCH
self.uni_TSC = UNI_CLF_BENCH
if custom_datasets is None:
Expand All @@ -39,13 +40,13 @@ def __init__(self,
self.custom_datasets = custom_datasets

if use_small_datasets:
self.path_to_result = '/benchmark/results/time_series_uni_clf_comparasion.csv'
self.path_to_save = '/benchmark/results/ts_uni_classification'
self.path_to_result = 'time_series_uni_clf_comparasion.csv'
self.path_to_save = 'ts_uni_classification'
else:
self.path_to_result = '/benchmark/results/time_series_multi_clf_comparasion.csv'
self.path_to_save = '/benchmark/results/ts_multi_classification'
self.results_picker = ResultsPicker(
path=os.path.abspath(self.output_dir))
self.path_to_result = 'time_series_multi_clf_comparasion.csv'
self.path_to_save = 'ts_multi_classification'
self.output_dir = os.path.join(self.experiment_setup['output_folder'], self.path_to_save)
self.results_picker = ResultsPicker(path=os.path.abspath(self.output_dir))

def _run_model_versus_model(self, dataset_name, comparasion_dict):
approach_dict = {}
Expand All @@ -55,45 +56,42 @@ def _run_model_versus_model(self, dataset_name, comparasion_dict):
eval(dataset=dataset_name,
initial_assumption=comparasion_dict[approach],
finetune=self.experiment_setup['finetune'])
approach_dict.update({approach: result_dict['metrics']})
metric = result_dict['metrics'][self.experiment_setup['metric']][0]
approach_dict.update({approach: metric})
return approach_dict

def _run_industrial_versus_sota(self, dataset_name):
experiment_setup = deepcopy(self.experiment_setup)
prediction, target = self.evaluate_loop(dataset_name, experiment_setup)
Accuracy(target, prediction).metric()
return Accuracy(target, prediction).metric()

def run(self):
self.logger.info('Benchmark test started')
basic_results = self.load_local_basic_results()
metric_dict = {}
for dataset_name in self.custom_datasets:
if isinstance(self.experiment_setup['initial_assumption'], dict):
metric = self._run_model_versus_model(dataset_name, self.experiment_setup['initial_assumption'])
model_name = list(self.experiment_setup['initial_assumption'].keys())
else:
metric = self._run_industrial_versus_sota()
model_name = 'Fedot_Industrial'
metric_dict.update({dataset_name: metric})
basic_results.loc[dataset_name, model_name] = metric
dataset_path = os.path.join(
self.experiment_setup['output_folder'],
f'{dataset_name}',
'metrics_report.csv')
basic_results.to_csv(dataset_path)
gc.collect()
basic_path = os.path.join(
self.experiment_setup['output_folder'],
'comprasion_metrics_report.csv')
basic_results.to_csv(basic_path)
try:
if isinstance(self.init_assumption, dict):
model_name = list(self.init_assumption.keys())
metric = self._run_model_versus_model(dataset_name, self.init_assumption)
else:
metric = self._run_industrial_versus_sota()
model_name = 'Fedot_Industrial'
metric_dict.update({dataset_name: metric})
basic_results.loc[dataset_name, model_name] = metric
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
basic_results.to_csv(os.path.join(self.output_dir, self.path_to_result))
except Exception:
self.logger.info(f"{dataset_name} problem with eval")
self.logger.info("Benchmark test finished")

def finetune(self):
self.logger.info('Benchmark finetune started')
dataset_result = {}
for dataset_name in self.custom_datasets:
path_to_results = PROJECT_PATH + \
self.path_to_save + f'/{dataset_name}'
self.path_to_save + f'/{dataset_name}'
composed_model_path = [
path_to_results +
f'/{x}' for x in os.listdir(path_to_results) if x.__contains__('pipeline_saved')]
Expand All @@ -102,14 +100,14 @@ def finetune(self):
if os.path.isdir(p):
try:
self.experiment_setup['output_folder'] = PROJECT_PATH + \
self.path_to_save
self.path_to_save
experiment_setup = deepcopy(self.experiment_setup)
prediction, model = self.finetune_loop(
dataset_name, experiment_setup, p)
metric_result.update({p:
{'metric': Accuracy(model.predict_data.target,
prediction.ravel()).metric(),
'tuned_model': model}})
{'metric': Accuracy(model.predict_data.target,
prediction.ravel()).metric(),
'tuned_model': model}})
except ModuleNotFoundError as ex:
print(f'{ex}.OLD VERSION OF PIPELINE. DELETE DIRECTORY')
if len(composed_model_path) != 1:
Expand All @@ -130,10 +128,10 @@ def finetune(self):
for _ in metric_result.keys():
if best_metric == 0:
best_metric, best_model, path = metric_result[_][
'metric'], metric_result[_]['tuned_model'], _
'metric'], metric_result[_]['tuned_model'], _
elif metric_result[_]['metric'] > best_metric:
best_metric, best_model, path = metric_result[_][
'metric'], metric_result[_]['tuned_model'], _
'metric'], metric_result[_]['tuned_model'], _
fedot_results.loc[dataset_name,
'Fedot_Industrial_finetuned'] = best_metric
best_model.output_folder = f'{_}_tuned'
Expand All @@ -147,15 +145,14 @@ def finetune(self):

def load_local_basic_results(self, path: str = None):
if path is None:
path = PROJECT_PATH + self.path_to_result
path = os.path.join(self.output_dir, self.path_to_result)
try:
results = pd.read_csv(path, sep=',', index_col=0)
results = results.dropna(axis=1, how='all')
results = results.dropna(axis=0, how='all')
# results = results.fillna()
# results = results.dropna(axis=1, how='all')
# results = results.dropna(axis=0, how='all')
except Exception:
results = self.load_web_results()
self.experiment_setup['output_folder'] = PROJECT_PATH + \
self.path_to_save
return results
else:
return self.results_picker.run(get_metrics_df=True, add_info=True)
Expand All @@ -165,14 +162,14 @@ def create_report(self):
names = []
for dataset_name in self.custom_datasets:
model_result_path = PROJECT_PATH + self.path_to_save + \
f'/{dataset_name}' + '/metrics_report.csv'
f'/{dataset_name}' + '/metrics_report.csv'
if os.path.isfile(model_result_path):
df = pd.read_csv(model_result_path, index_col=0, sep=',')
df = df.fillna(0)
if 'Fedot_Industrial_finetuned' not in df.columns:
df['Fedot_Industrial_finetuned'] = 0
metrics = df.loc[dataset_name,
'Fedot_Industrial':'Fedot_Industrial_finetuned']
'Fedot_Industrial':'Fedot_Industrial_finetuned']
_.append(metrics.T.values)
names.append(dataset_name)
stacked_resutls = np.stack(_, axis=1).T
Expand Down
67 changes: 43 additions & 24 deletions benchmark/benchmark_TSER.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
from fedot_ind.core.repository.constanst_repository import MULTI_REG_BENCH
from fedot_ind.core.architecture.postprocessing.results_picker import ResultsPicker
from benchmark.abstract_bench import AbstractBenchmark
from fedot_ind.core.metrics.metrics_implementation import RMSE
from fedot_ind.api.utils.path_lib import PROJECT_PATH
from fedot_ind.api.main import FedotIndustrial
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.node import PipelineNode
import pandas as pd
import logging
import os
from abc import ABC
from copy import deepcopy

import matplotlib
import pandas as pd
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline

from benchmark.abstract_bench import AbstractBenchmark
from fedot_ind.api.main import FedotIndustrial
from fedot_ind.api.utils.path_lib import PROJECT_PATH
from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate
from fedot_ind.core.architecture.postprocessing.results_picker import ResultsPicker
from fedot_ind.core.metrics.metrics_implementation import RMSE
from fedot_ind.core.repository.constanst_repository import MULTI_REG_BENCH
from fedot_ind.tools.loader import DataLoader

matplotlib.use('TkAgg')
Expand All @@ -31,6 +32,7 @@ def __init__(self,
self.logger = logging.getLogger(self.__class__.__name__)

self.experiment_setup = experiment_setup
self.init_assumption = deepcopy(self.experiment_setup['initial_assumption'])
self.monash_regression = MULTI_REG_BENCH
if custom_datasets is None:
self.custom_datasets = self.monash_regression
Expand All @@ -40,26 +42,43 @@ def __init__(self,
self.results_picker = ResultsPicker(
path=os.path.abspath(self.output_dir))

def _run_model_versus_model(self, dataset_name, comparasion_dict):
approach_dict = {}
for approach in comparasion_dict.keys():
result_dict = ApiTemplate(api_config=self.experiment_setup,
metric_list=self.experiment_setup['metric_names']). \
eval(dataset=dataset_name,
initial_assumption=comparasion_dict[approach],
finetune=self.experiment_setup['finetune'])
metric = result_dict['metrics'][self.experiment_setup['metric']][0]
approach_dict.update({approach: metric})
return approach_dict

def _run_industrial_versus_sota(self, dataset_name):
experiment_setup = deepcopy(self.experiment_setup)
prediction, target = self.evaluate_loop(dataset_name, experiment_setup)
return RMSE(target, prediction).metric()

def run(self):
self.logger.info('Benchmark test started')
basic_results = self.load_local_basic_results()
metric_dict = {}
for dataset_name in self.custom_datasets:
experiment_setup = deepcopy(self.experiment_setup)
prediction, target = self.evaluate_loop(
dataset_name, experiment_setup)
metric = RMSE(target, prediction).metric()
metric_dict.update({dataset_name: metric})
basic_results.loc[dataset_name, 'Fedot_Industrial'] = metric
dataset_path = os.path.join(
self.experiment_setup['output_folder'],
f'{dataset_name}',
'metrics_report.csv')
basic_results.to_csv(dataset_path)
basic_path = os.path.join(
self.experiment_setup['output_folder'],
'comprasion_metrics_report.csv')
basic_results.to_csv(basic_path)
try:
if isinstance(self.init_assumption, dict):
model_name = list(self.init_assumption.keys())
metric = self._run_model_versus_model(dataset_name, self.init_assumption)
else:
metric = self._run_industrial_versus_sota(dataset_name)
model_name = 'Fedot_Industrial'
metric_dict.update({dataset_name: metric})
basic_results.loc[dataset_name, model_name] = metric
basic_path = os.path.join(self.experiment_setup['output_folder'])
if not os.path.exists(basic_path):
os.makedirs(basic_path)
basic_results.to_csv(os.path.join(basic_path, 'comprasion_metrics_report.csv'))
except Exception:
self.logger.info(f"{dataset_name} problem with eval")
self.logger.info("Benchmark test finished")

def load_local_basic_results(self, path: str = None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from fedot_ind.api.utils.checkers_collections import DataCheck
from fedot_ind.api.utils.path_lib import PROJECT_PATH
from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate
from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor
from fedot_ind.core.operation.transformation.representation.statistical.quantile_extractor import QuantileExtractor
from fedot_ind.core.repository.constanst_repository import FEDOT_TASK
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels

Expand Down
24 changes: 24 additions & 0 deletions examples/benchmark_example/classification/PDL_multi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from benchmark.benchmark_TSC import BenchmarkTSC

init_assumption_pdl = ['quantile_extractor', 'pdl_clf']
init_assumption_rf = ['quantile_extractor', 'rf']
comparasion_dict = dict(pairwise_approach=init_assumption_pdl,
baseline=init_assumption_rf)
experiment_setup = {
'problem': 'classification',
'metric': 'accuracy',
'timeout': 2.0,
'num_of_generations': 15,
'pop_size': 10,
'metric_names': ('f1', 'accuracy'),
'logging_level': 10,
'n_jobs': -1,
'output_folder': r'D:\\WORK\\Repo\\Industiral\\IndustrialTS/benchmark/results/',
'initial_assumption': comparasion_dict,
'finetune': True}

if __name__ == "__main__":
benchmark = BenchmarkTSC(experiment_setup=experiment_setup,
use_small_datasets=False)
benchmark.run()
_ = 1
3 changes: 2 additions & 1 deletion examples/benchmark_example/classification/PDL_uni.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
experiment_setup = {
'problem': 'classification',
'metric': 'accuracy',
'timeout': 2,
'timeout': 2.0,
'num_of_generations': 15,
'pop_size': 10,
'metric_names': ('f1', 'accuracy'),
'logging_level': 10,
'n_jobs': -1,
'initial_assumption': comparasion_dict,
'output_folder': r'D:\\WORK\\Repo\\Industiral\\IndustrialTS/benchmark/results/',
'finetune': True}

if __name__ == "__main__":
Expand Down
45 changes: 45 additions & 0 deletions examples/benchmark_example/regression/PDL_multi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from benchmark.benchmark_TSER import BenchmarkTSER

init_assumption_pdl = ['quantile_extractor', 'pdl_reg']
init_assumption_rf = ['quantile_extractor', 'treg']
comparasion_dict = dict(pairwise_approach=init_assumption_pdl,
baseline=init_assumption_rf)
experiment_setup = {
'problem': 'regression',
'metric': 'rmse',
'timeout': 2.0,
'num_of_generations': 15,
'pop_size': 10,
'metric_names': ('f1', 'accuracy'),
'logging_level': 10,
'n_jobs': -1,
'initial_assumption': comparasion_dict,
'finetune': True}
custom_dataset = [
# 'ElectricMotorTemperature',
# 'PrecipitationAndalusia',
# 'AcousticContaminationMadrid',
# 'WindTurbinePower',
# 'DailyOilGasPrices',
# 'DailyTemperatureLatitude',
# 'LPGasMonitoringHomeActivity',
# 'AluminiumConcentration',
# 'BoronConcentration',
# 'CopperConcentration',
# # 'IronConcentration',
# 'ManganeseConcentration',
# 'SodiumConcentration',
# 'PhosphorusConcentration',
# 'PotassiumConcentration',
'MagnesiumConcentration',
'SulphurConcentration',
'ZincConcentration',
'CalciumConcentration'
]
custom_dataset = None
if __name__ == "__main__":
benchmark = BenchmarkTSER(experiment_setup=experiment_setup,
custom_datasets=custom_dataset
)
benchmark.run()
_ = 1
Loading

0 comments on commit 7d6490a

Please sign in to comment.