add PDL model, benchmark comparasion and example from parma

aimclub · Nov 28, 2024 · 7d6490a · 7d6490a
1 parent 5855e4a
commit 7d6490a
Show file tree

Hide file tree

Showing 9 changed files with 231 additions and 74 deletions.
diff --git a/benchmark/benchmark_TSC.py b/benchmark/benchmark_TSC.py
@@ -28,6 +28,7 @@ def __init__(self,
         self.logger = logging.getLogger(self.__class__.__name__)
 
         self.experiment_setup = experiment_setup
+        self.init_assumption = deepcopy(self.experiment_setup['initial_assumption'])
         self.multi_TSC = MULTI_CLF_BENCH
         self.uni_TSC = UNI_CLF_BENCH
         if custom_datasets is None:
@@ -39,13 +40,13 @@ def __init__(self,
             self.custom_datasets = custom_datasets
 
         if use_small_datasets:
-            self.path_to_result = '/benchmark/results/time_series_uni_clf_comparasion.csv'
-            self.path_to_save = '/benchmark/results/ts_uni_classification'
+            self.path_to_result = 'time_series_uni_clf_comparasion.csv'
+            self.path_to_save = 'ts_uni_classification'
         else:
-            self.path_to_result = '/benchmark/results/time_series_multi_clf_comparasion.csv'
-            self.path_to_save = '/benchmark/results/ts_multi_classification'
-        self.results_picker = ResultsPicker(
-            path=os.path.abspath(self.output_dir))
+            self.path_to_result = 'time_series_multi_clf_comparasion.csv'
+            self.path_to_save = 'ts_multi_classification'
+        self.output_dir = os.path.join(self.experiment_setup['output_folder'], self.path_to_save)
+        self.results_picker = ResultsPicker(path=os.path.abspath(self.output_dir))
 
     def _run_model_versus_model(self, dataset_name, comparasion_dict):
         approach_dict = {}
@@ -55,45 +56,42 @@ def _run_model_versus_model(self, dataset_name, comparasion_dict):
                 eval(dataset=dataset_name,
                      initial_assumption=comparasion_dict[approach],
                      finetune=self.experiment_setup['finetune'])
-            approach_dict.update({approach: result_dict['metrics']})
+            metric = result_dict['metrics'][self.experiment_setup['metric']][0]
+            approach_dict.update({approach: metric})
         return approach_dict
 
     def _run_industrial_versus_sota(self, dataset_name):
         experiment_setup = deepcopy(self.experiment_setup)
         prediction, target = self.evaluate_loop(dataset_name, experiment_setup)
-        Accuracy(target, prediction).metric()
+        return Accuracy(target, prediction).metric()
 
     def run(self):
         self.logger.info('Benchmark test started')
         basic_results = self.load_local_basic_results()
         metric_dict = {}
         for dataset_name in self.custom_datasets:
-            if isinstance(self.experiment_setup['initial_assumption'], dict):
-                metric = self._run_model_versus_model(dataset_name, self.experiment_setup['initial_assumption'])
-                model_name = list(self.experiment_setup['initial_assumption'].keys())
-            else:
-                metric = self._run_industrial_versus_sota()
-                model_name = 'Fedot_Industrial'
-            metric_dict.update({dataset_name: metric})
-            basic_results.loc[dataset_name, model_name] = metric
-            dataset_path = os.path.join(
-                self.experiment_setup['output_folder'],
-                f'{dataset_name}',
-                'metrics_report.csv')
-            basic_results.to_csv(dataset_path)
-            gc.collect()
-            basic_path = os.path.join(
-                self.experiment_setup['output_folder'],
-                'comprasion_metrics_report.csv')
-            basic_results.to_csv(basic_path)
+            try:
+                if isinstance(self.init_assumption, dict):
+                    model_name = list(self.init_assumption.keys())
+                    metric = self._run_model_versus_model(dataset_name, self.init_assumption)
+                else:
+                    metric = self._run_industrial_versus_sota()
+                    model_name = 'Fedot_Industrial'
+                metric_dict.update({dataset_name: metric})
+                basic_results.loc[dataset_name, model_name] = metric
+                if not os.path.exists(self.output_dir):
+                    os.makedirs(self.output_dir)
+                basic_results.to_csv(os.path.join(self.output_dir, self.path_to_result))
+            except Exception:
+                self.logger.info(f"{dataset_name} problem with eval")
         self.logger.info("Benchmark test finished")
 
     def finetune(self):
         self.logger.info('Benchmark finetune started')
         dataset_result = {}
         for dataset_name in self.custom_datasets:
             path_to_results = PROJECT_PATH + \
-                self.path_to_save + f'/{dataset_name}'
+                              self.path_to_save + f'/{dataset_name}'
             composed_model_path = [
                 path_to_results +
                 f'/{x}' for x in os.listdir(path_to_results) if x.__contains__('pipeline_saved')]
@@ -102,14 +100,14 @@ def finetune(self):
                 if os.path.isdir(p):
                     try:
                         self.experiment_setup['output_folder'] = PROJECT_PATH + \
-                            self.path_to_save
+                                                                 self.path_to_save
                         experiment_setup = deepcopy(self.experiment_setup)
                         prediction, model = self.finetune_loop(
                             dataset_name, experiment_setup, p)
                         metric_result.update({p:
-                                              {'metric': Accuracy(model.predict_data.target,
-                                                                  prediction.ravel()).metric(),
-                                               'tuned_model': model}})
+                                                  {'metric': Accuracy(model.predict_data.target,
+                                                                      prediction.ravel()).metric(),
+                                                   'tuned_model': model}})
                     except ModuleNotFoundError as ex:
                         print(f'{ex}.OLD VERSION OF PIPELINE. DELETE DIRECTORY')
                         if len(composed_model_path) != 1:
@@ -130,10 +128,10 @@ def finetune(self):
                 for _ in metric_result.keys():
                     if best_metric == 0:
                         best_metric, best_model, path = metric_result[_][
-                            'metric'], metric_result[_]['tuned_model'], _
+                                                            'metric'], metric_result[_]['tuned_model'], _
                     elif metric_result[_]['metric'] > best_metric:
                         best_metric, best_model, path = metric_result[_][
-                            'metric'], metric_result[_]['tuned_model'], _
+                                                            'metric'], metric_result[_]['tuned_model'], _
                 fedot_results.loc[dataset_name,
                                   'Fedot_Industrial_finetuned'] = best_metric
                 best_model.output_folder = f'{_}_tuned'
@@ -147,15 +145,14 @@ def finetune(self):
 
     def load_local_basic_results(self, path: str = None):
         if path is None:
-            path = PROJECT_PATH + self.path_to_result
+            path = os.path.join(self.output_dir, self.path_to_result)
             try:
                 results = pd.read_csv(path, sep=',', index_col=0)
-                results = results.dropna(axis=1, how='all')
-                results = results.dropna(axis=0, how='all')
+                # results = results.fillna()
+                # results = results.dropna(axis=1, how='all')
+                # results = results.dropna(axis=0, how='all')
             except Exception:
                 results = self.load_web_results()
-            self.experiment_setup['output_folder'] = PROJECT_PATH + \
-                self.path_to_save
             return results
         else:
             return self.results_picker.run(get_metrics_df=True, add_info=True)
@@ -165,14 +162,14 @@ def create_report(self):
         names = []
         for dataset_name in self.custom_datasets:
             model_result_path = PROJECT_PATH + self.path_to_save + \
-                f'/{dataset_name}' + '/metrics_report.csv'
+                                f'/{dataset_name}' + '/metrics_report.csv'
             if os.path.isfile(model_result_path):
                 df = pd.read_csv(model_result_path, index_col=0, sep=',')
                 df = df.fillna(0)
                 if 'Fedot_Industrial_finetuned' not in df.columns:
                     df['Fedot_Industrial_finetuned'] = 0
                 metrics = df.loc[dataset_name,
-                                 'Fedot_Industrial':'Fedot_Industrial_finetuned']
+                          'Fedot_Industrial':'Fedot_Industrial_finetuned']
                 _.append(metrics.T.values)
                 names.append(dataset_name)
         stacked_resutls = np.stack(_, axis=1).T

diff --git a/benchmark/benchmark_TSER.py b/benchmark/benchmark_TSER.py
@@ -1,19 +1,20 @@
-from fedot_ind.core.repository.constanst_repository import MULTI_REG_BENCH
-from fedot_ind.core.architecture.postprocessing.results_picker import ResultsPicker
-from benchmark.abstract_bench import AbstractBenchmark
-from fedot_ind.core.metrics.metrics_implementation import RMSE
-from fedot_ind.api.utils.path_lib import PROJECT_PATH
-from fedot_ind.api.main import FedotIndustrial
-from fedot.core.pipelines.pipeline import Pipeline
-from fedot.core.pipelines.node import PipelineNode
-import pandas as pd
 import logging
 import os
 from abc import ABC
 from copy import deepcopy
 
 import matplotlib
+import pandas as pd
+from fedot.core.pipelines.node import PipelineNode
+from fedot.core.pipelines.pipeline import Pipeline
 
+from benchmark.abstract_bench import AbstractBenchmark
+from fedot_ind.api.main import FedotIndustrial
+from fedot_ind.api.utils.path_lib import PROJECT_PATH
+from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate
+from fedot_ind.core.architecture.postprocessing.results_picker import ResultsPicker
+from fedot_ind.core.metrics.metrics_implementation import RMSE
+from fedot_ind.core.repository.constanst_repository import MULTI_REG_BENCH
 from fedot_ind.tools.loader import DataLoader
 
 matplotlib.use('TkAgg')
@@ -31,6 +32,7 @@ def __init__(self,
         self.logger = logging.getLogger(self.__class__.__name__)
 
         self.experiment_setup = experiment_setup
+        self.init_assumption = deepcopy(self.experiment_setup['initial_assumption'])
         self.monash_regression = MULTI_REG_BENCH
         if custom_datasets is None:
             self.custom_datasets = self.monash_regression
@@ -40,26 +42,43 @@ def __init__(self,
         self.results_picker = ResultsPicker(
             path=os.path.abspath(self.output_dir))
 
+    def _run_model_versus_model(self, dataset_name, comparasion_dict):
+        approach_dict = {}
+        for approach in comparasion_dict.keys():
+            result_dict = ApiTemplate(api_config=self.experiment_setup,
+                                      metric_list=self.experiment_setup['metric_names']). \
+                eval(dataset=dataset_name,
+                     initial_assumption=comparasion_dict[approach],
+                     finetune=self.experiment_setup['finetune'])
+            metric = result_dict['metrics'][self.experiment_setup['metric']][0]
+            approach_dict.update({approach: metric})
+        return approach_dict
+
+    def _run_industrial_versus_sota(self, dataset_name):
+        experiment_setup = deepcopy(self.experiment_setup)
+        prediction, target = self.evaluate_loop(dataset_name, experiment_setup)
+        return RMSE(target, prediction).metric()
+
     def run(self):
         self.logger.info('Benchmark test started')
         basic_results = self.load_local_basic_results()
         metric_dict = {}
         for dataset_name in self.custom_datasets:
-            experiment_setup = deepcopy(self.experiment_setup)
-            prediction, target = self.evaluate_loop(
-                dataset_name, experiment_setup)
-            metric = RMSE(target, prediction).metric()
-            metric_dict.update({dataset_name: metric})
-            basic_results.loc[dataset_name, 'Fedot_Industrial'] = metric
-            dataset_path = os.path.join(
-                self.experiment_setup['output_folder'],
-                f'{dataset_name}',
-                'metrics_report.csv')
-            basic_results.to_csv(dataset_path)
-        basic_path = os.path.join(
-            self.experiment_setup['output_folder'],
-            'comprasion_metrics_report.csv')
-        basic_results.to_csv(basic_path)
+            try:
+                if isinstance(self.init_assumption, dict):
+                    model_name = list(self.init_assumption.keys())
+                    metric = self._run_model_versus_model(dataset_name, self.init_assumption)
+                else:
+                    metric = self._run_industrial_versus_sota(dataset_name)
+                    model_name = 'Fedot_Industrial'
+                metric_dict.update({dataset_name: metric})
+                basic_results.loc[dataset_name, model_name] = metric
+                basic_path = os.path.join(self.experiment_setup['output_folder'])
+                if not os.path.exists(basic_path):
+                    os.makedirs(basic_path)
+                basic_results.to_csv(os.path.join(basic_path, 'comprasion_metrics_report.csv'))
+            except Exception:
+                self.logger.info(f"{dataset_name} problem with eval")
         self.logger.info("Benchmark test finished")
 
     def load_local_basic_results(self, path: str = None):

diff --git a/examples/automl_example/api_example/time_series/ts_anomaly_detection/custom_liman_example.py b/examples/automl_example/api_example/time_series/ts_anomaly_detection/custom_liman_example.py
@@ -8,7 +8,7 @@
 from fedot_ind.api.utils.checkers_collections import DataCheck
 from fedot_ind.api.utils.path_lib import PROJECT_PATH
 from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate
-from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor
+from fedot_ind.core.operation.transformation.representation.statistical.quantile_extractor import QuantileExtractor
 from fedot_ind.core.repository.constanst_repository import FEDOT_TASK
 from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels
 

diff --git a/examples/benchmark_example/classification/PDL_multi.py b/examples/benchmark_example/classification/PDL_multi.py
@@ -0,0 +1,24 @@
+from benchmark.benchmark_TSC import BenchmarkTSC
+
+init_assumption_pdl = ['quantile_extractor', 'pdl_clf']
+init_assumption_rf = ['quantile_extractor', 'rf']
+comparasion_dict = dict(pairwise_approach=init_assumption_pdl,
+                        baseline=init_assumption_rf)
+experiment_setup = {
+    'problem': 'classification',
+    'metric': 'accuracy',
+    'timeout': 2.0,
+    'num_of_generations': 15,
+    'pop_size': 10,
+    'metric_names': ('f1', 'accuracy'),
+    'logging_level': 10,
+    'n_jobs': -1,
+    'output_folder': r'D:\\WORK\\Repo\\Industiral\\IndustrialTS/benchmark/results/',
+    'initial_assumption': comparasion_dict,
+    'finetune': True}
+
+if __name__ == "__main__":
+    benchmark = BenchmarkTSC(experiment_setup=experiment_setup,
+                             use_small_datasets=False)
+    benchmark.run()
+    _ = 1
diff --git a/examples/benchmark_example/classification/PDL_uni.py b/examples/benchmark_example/classification/PDL_uni.py
@@ -7,13 +7,14 @@
 experiment_setup = {
     'problem': 'classification',
     'metric': 'accuracy',
-    'timeout': 2,
+    'timeout': 2.0,
     'num_of_generations': 15,
     'pop_size': 10,
     'metric_names': ('f1', 'accuracy'),
     'logging_level': 10,
     'n_jobs': -1,
     'initial_assumption': comparasion_dict,
+    'output_folder': r'D:\\WORK\\Repo\\Industiral\\IndustrialTS/benchmark/results/',
     'finetune': True}
 
 if __name__ == "__main__":

diff --git a/examples/benchmark_example/regression/PDL_multi.py b/examples/benchmark_example/regression/PDL_multi.py
@@ -0,0 +1,45 @@
+from benchmark.benchmark_TSER import BenchmarkTSER
+
+init_assumption_pdl = ['quantile_extractor', 'pdl_reg']
+init_assumption_rf = ['quantile_extractor', 'treg']
+comparasion_dict = dict(pairwise_approach=init_assumption_pdl,
+                        baseline=init_assumption_rf)
+experiment_setup = {
+    'problem': 'regression',
+    'metric': 'rmse',
+    'timeout': 2.0,
+    'num_of_generations': 15,
+    'pop_size': 10,
+    'metric_names': ('f1', 'accuracy'),
+    'logging_level': 10,
+    'n_jobs': -1,
+    'initial_assumption': comparasion_dict,
+    'finetune': True}
+custom_dataset = [
+    # 'ElectricMotorTemperature',
+    #              'PrecipitationAndalusia',
+    #  'AcousticContaminationMadrid',
+    # 'WindTurbinePower',
+    # 'DailyOilGasPrices',
+    # 'DailyTemperatureLatitude',
+    # 'LPGasMonitoringHomeActivity',
+    # 'AluminiumConcentration',
+    # 'BoronConcentration',
+    # 'CopperConcentration',
+    # # 'IronConcentration',
+    #  'ManganeseConcentration',
+    #  'SodiumConcentration',
+    #  'PhosphorusConcentration',
+    #  'PotassiumConcentration',
+    'MagnesiumConcentration',
+    'SulphurConcentration',
+    'ZincConcentration',
+    'CalciumConcentration'
+]
+custom_dataset = None
+if __name__ == "__main__":
+    benchmark = BenchmarkTSER(experiment_setup=experiment_setup,
+                              custom_datasets=custom_dataset
+                              )
+    benchmark.run()
+    _ = 1