diff --git a/benchmark/benchmark_TSC.py b/benchmark/benchmark_TSC.py index 22bb1c1f8..219b42aae 100644 --- a/benchmark/benchmark_TSC.py +++ b/benchmark/benchmark_TSC.py @@ -28,6 +28,7 @@ def __init__(self, self.logger = logging.getLogger(self.__class__.__name__) self.experiment_setup = experiment_setup + self.init_assumption = deepcopy(self.experiment_setup['initial_assumption']) self.multi_TSC = MULTI_CLF_BENCH self.uni_TSC = UNI_CLF_BENCH if custom_datasets is None: @@ -39,13 +40,13 @@ def __init__(self, self.custom_datasets = custom_datasets if use_small_datasets: - self.path_to_result = '/benchmark/results/time_series_uni_clf_comparasion.csv' - self.path_to_save = '/benchmark/results/ts_uni_classification' + self.path_to_result = 'time_series_uni_clf_comparasion.csv' + self.path_to_save = 'ts_uni_classification' else: - self.path_to_result = '/benchmark/results/time_series_multi_clf_comparasion.csv' - self.path_to_save = '/benchmark/results/ts_multi_classification' - self.results_picker = ResultsPicker( - path=os.path.abspath(self.output_dir)) + self.path_to_result = 'time_series_multi_clf_comparasion.csv' + self.path_to_save = 'ts_multi_classification' + self.output_dir = os.path.join(self.experiment_setup['output_folder'], self.path_to_save) + self.results_picker = ResultsPicker(path=os.path.abspath(self.output_dir)) def _run_model_versus_model(self, dataset_name, comparasion_dict): approach_dict = {} @@ -55,37 +56,34 @@ def _run_model_versus_model(self, dataset_name, comparasion_dict): eval(dataset=dataset_name, initial_assumption=comparasion_dict[approach], finetune=self.experiment_setup['finetune']) - approach_dict.update({approach: result_dict['metrics']}) + metric = result_dict['metrics'][self.experiment_setup['metric']][0] + approach_dict.update({approach: metric}) return approach_dict def _run_industrial_versus_sota(self, dataset_name): experiment_setup = deepcopy(self.experiment_setup) prediction, target = self.evaluate_loop(dataset_name, experiment_setup) - Accuracy(target, prediction).metric() + return Accuracy(target, prediction).metric() def run(self): self.logger.info('Benchmark test started') basic_results = self.load_local_basic_results() metric_dict = {} for dataset_name in self.custom_datasets: - if isinstance(self.experiment_setup['initial_assumption'], dict): - metric = self._run_model_versus_model(dataset_name, self.experiment_setup['initial_assumption']) - model_name = list(self.experiment_setup['initial_assumption'].keys()) - else: - metric = self._run_industrial_versus_sota() - model_name = 'Fedot_Industrial' - metric_dict.update({dataset_name: metric}) - basic_results.loc[dataset_name, model_name] = metric - dataset_path = os.path.join( - self.experiment_setup['output_folder'], - f'{dataset_name}', - 'metrics_report.csv') - basic_results.to_csv(dataset_path) - gc.collect() - basic_path = os.path.join( - self.experiment_setup['output_folder'], - 'comprasion_metrics_report.csv') - basic_results.to_csv(basic_path) + try: + if isinstance(self.init_assumption, dict): + model_name = list(self.init_assumption.keys()) + metric = self._run_model_versus_model(dataset_name, self.init_assumption) + else: + metric = self._run_industrial_versus_sota() + model_name = 'Fedot_Industrial' + metric_dict.update({dataset_name: metric}) + basic_results.loc[dataset_name, model_name] = metric + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + basic_results.to_csv(os.path.join(self.output_dir, self.path_to_result)) + except Exception: + self.logger.info(f"{dataset_name} problem with eval") self.logger.info("Benchmark test finished") def finetune(self): @@ -93,7 +91,7 @@ def finetune(self): dataset_result = {} for dataset_name in self.custom_datasets: path_to_results = PROJECT_PATH + \ - self.path_to_save + f'/{dataset_name}' + self.path_to_save + f'/{dataset_name}' composed_model_path = [ path_to_results + f'/{x}' for x in os.listdir(path_to_results) if x.__contains__('pipeline_saved')] @@ -102,14 +100,14 @@ def finetune(self): if os.path.isdir(p): try: self.experiment_setup['output_folder'] = PROJECT_PATH + \ - self.path_to_save + self.path_to_save experiment_setup = deepcopy(self.experiment_setup) prediction, model = self.finetune_loop( dataset_name, experiment_setup, p) metric_result.update({p: - {'metric': Accuracy(model.predict_data.target, - prediction.ravel()).metric(), - 'tuned_model': model}}) + {'metric': Accuracy(model.predict_data.target, + prediction.ravel()).metric(), + 'tuned_model': model}}) except ModuleNotFoundError as ex: print(f'{ex}.OLD VERSION OF PIPELINE. DELETE DIRECTORY') if len(composed_model_path) != 1: @@ -130,10 +128,10 @@ def finetune(self): for _ in metric_result.keys(): if best_metric == 0: best_metric, best_model, path = metric_result[_][ - 'metric'], metric_result[_]['tuned_model'], _ + 'metric'], metric_result[_]['tuned_model'], _ elif metric_result[_]['metric'] > best_metric: best_metric, best_model, path = metric_result[_][ - 'metric'], metric_result[_]['tuned_model'], _ + 'metric'], metric_result[_]['tuned_model'], _ fedot_results.loc[dataset_name, 'Fedot_Industrial_finetuned'] = best_metric best_model.output_folder = f'{_}_tuned' @@ -147,15 +145,14 @@ def finetune(self): def load_local_basic_results(self, path: str = None): if path is None: - path = PROJECT_PATH + self.path_to_result + path = os.path.join(self.output_dir, self.path_to_result) try: results = pd.read_csv(path, sep=',', index_col=0) - results = results.dropna(axis=1, how='all') - results = results.dropna(axis=0, how='all') + # results = results.fillna() + # results = results.dropna(axis=1, how='all') + # results = results.dropna(axis=0, how='all') except Exception: results = self.load_web_results() - self.experiment_setup['output_folder'] = PROJECT_PATH + \ - self.path_to_save return results else: return self.results_picker.run(get_metrics_df=True, add_info=True) @@ -165,14 +162,14 @@ def create_report(self): names = [] for dataset_name in self.custom_datasets: model_result_path = PROJECT_PATH + self.path_to_save + \ - f'/{dataset_name}' + '/metrics_report.csv' + f'/{dataset_name}' + '/metrics_report.csv' if os.path.isfile(model_result_path): df = pd.read_csv(model_result_path, index_col=0, sep=',') df = df.fillna(0) if 'Fedot_Industrial_finetuned' not in df.columns: df['Fedot_Industrial_finetuned'] = 0 metrics = df.loc[dataset_name, - 'Fedot_Industrial':'Fedot_Industrial_finetuned'] + 'Fedot_Industrial':'Fedot_Industrial_finetuned'] _.append(metrics.T.values) names.append(dataset_name) stacked_resutls = np.stack(_, axis=1).T diff --git a/benchmark/benchmark_TSER.py b/benchmark/benchmark_TSER.py index 61e985025..b5b7c170e 100644 --- a/benchmark/benchmark_TSER.py +++ b/benchmark/benchmark_TSER.py @@ -1,19 +1,20 @@ -from fedot_ind.core.repository.constanst_repository import MULTI_REG_BENCH -from fedot_ind.core.architecture.postprocessing.results_picker import ResultsPicker -from benchmark.abstract_bench import AbstractBenchmark -from fedot_ind.core.metrics.metrics_implementation import RMSE -from fedot_ind.api.utils.path_lib import PROJECT_PATH -from fedot_ind.api.main import FedotIndustrial -from fedot.core.pipelines.pipeline import Pipeline -from fedot.core.pipelines.node import PipelineNode -import pandas as pd import logging import os from abc import ABC from copy import deepcopy import matplotlib +import pandas as pd +from fedot.core.pipelines.node import PipelineNode +from fedot.core.pipelines.pipeline import Pipeline +from benchmark.abstract_bench import AbstractBenchmark +from fedot_ind.api.main import FedotIndustrial +from fedot_ind.api.utils.path_lib import PROJECT_PATH +from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate +from fedot_ind.core.architecture.postprocessing.results_picker import ResultsPicker +from fedot_ind.core.metrics.metrics_implementation import RMSE +from fedot_ind.core.repository.constanst_repository import MULTI_REG_BENCH from fedot_ind.tools.loader import DataLoader matplotlib.use('TkAgg') @@ -31,6 +32,7 @@ def __init__(self, self.logger = logging.getLogger(self.__class__.__name__) self.experiment_setup = experiment_setup + self.init_assumption = deepcopy(self.experiment_setup['initial_assumption']) self.monash_regression = MULTI_REG_BENCH if custom_datasets is None: self.custom_datasets = self.monash_regression @@ -40,26 +42,43 @@ def __init__(self, self.results_picker = ResultsPicker( path=os.path.abspath(self.output_dir)) + def _run_model_versus_model(self, dataset_name, comparasion_dict): + approach_dict = {} + for approach in comparasion_dict.keys(): + result_dict = ApiTemplate(api_config=self.experiment_setup, + metric_list=self.experiment_setup['metric_names']). \ + eval(dataset=dataset_name, + initial_assumption=comparasion_dict[approach], + finetune=self.experiment_setup['finetune']) + metric = result_dict['metrics'][self.experiment_setup['metric']][0] + approach_dict.update({approach: metric}) + return approach_dict + + def _run_industrial_versus_sota(self, dataset_name): + experiment_setup = deepcopy(self.experiment_setup) + prediction, target = self.evaluate_loop(dataset_name, experiment_setup) + return RMSE(target, prediction).metric() + def run(self): self.logger.info('Benchmark test started') basic_results = self.load_local_basic_results() metric_dict = {} for dataset_name in self.custom_datasets: - experiment_setup = deepcopy(self.experiment_setup) - prediction, target = self.evaluate_loop( - dataset_name, experiment_setup) - metric = RMSE(target, prediction).metric() - metric_dict.update({dataset_name: metric}) - basic_results.loc[dataset_name, 'Fedot_Industrial'] = metric - dataset_path = os.path.join( - self.experiment_setup['output_folder'], - f'{dataset_name}', - 'metrics_report.csv') - basic_results.to_csv(dataset_path) - basic_path = os.path.join( - self.experiment_setup['output_folder'], - 'comprasion_metrics_report.csv') - basic_results.to_csv(basic_path) + try: + if isinstance(self.init_assumption, dict): + model_name = list(self.init_assumption.keys()) + metric = self._run_model_versus_model(dataset_name, self.init_assumption) + else: + metric = self._run_industrial_versus_sota(dataset_name) + model_name = 'Fedot_Industrial' + metric_dict.update({dataset_name: metric}) + basic_results.loc[dataset_name, model_name] = metric + basic_path = os.path.join(self.experiment_setup['output_folder']) + if not os.path.exists(basic_path): + os.makedirs(basic_path) + basic_results.to_csv(os.path.join(basic_path, 'comprasion_metrics_report.csv')) + except Exception: + self.logger.info(f"{dataset_name} problem with eval") self.logger.info("Benchmark test finished") def load_local_basic_results(self, path: str = None): diff --git a/examples/automl_example/api_example/time_series/ts_anomaly_detection/custom_liman_example.py b/examples/automl_example/api_example/time_series/ts_anomaly_detection/custom_liman_example.py index 21e83d5af..caef4407f 100644 --- a/examples/automl_example/api_example/time_series/ts_anomaly_detection/custom_liman_example.py +++ b/examples/automl_example/api_example/time_series/ts_anomaly_detection/custom_liman_example.py @@ -8,7 +8,7 @@ from fedot_ind.api.utils.checkers_collections import DataCheck from fedot_ind.api.utils.path_lib import PROJECT_PATH from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate -from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor +from fedot_ind.core.operation.transformation.representation.statistical.quantile_extractor import QuantileExtractor from fedot_ind.core.repository.constanst_repository import FEDOT_TASK from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels diff --git a/examples/benchmark_example/classification/PDL_multi.py b/examples/benchmark_example/classification/PDL_multi.py new file mode 100644 index 000000000..ca8b628fc --- /dev/null +++ b/examples/benchmark_example/classification/PDL_multi.py @@ -0,0 +1,24 @@ +from benchmark.benchmark_TSC import BenchmarkTSC + +init_assumption_pdl = ['quantile_extractor', 'pdl_clf'] +init_assumption_rf = ['quantile_extractor', 'rf'] +comparasion_dict = dict(pairwise_approach=init_assumption_pdl, + baseline=init_assumption_rf) +experiment_setup = { + 'problem': 'classification', + 'metric': 'accuracy', + 'timeout': 2.0, + 'num_of_generations': 15, + 'pop_size': 10, + 'metric_names': ('f1', 'accuracy'), + 'logging_level': 10, + 'n_jobs': -1, + 'output_folder': r'D:\\WORK\\Repo\\Industiral\\IndustrialTS/benchmark/results/', + 'initial_assumption': comparasion_dict, + 'finetune': True} + +if __name__ == "__main__": + benchmark = BenchmarkTSC(experiment_setup=experiment_setup, + use_small_datasets=False) + benchmark.run() + _ = 1 diff --git a/examples/benchmark_example/classification/PDL_uni.py b/examples/benchmark_example/classification/PDL_uni.py index 813a3b8ff..d9c81ddf8 100644 --- a/examples/benchmark_example/classification/PDL_uni.py +++ b/examples/benchmark_example/classification/PDL_uni.py @@ -7,13 +7,14 @@ experiment_setup = { 'problem': 'classification', 'metric': 'accuracy', - 'timeout': 2, + 'timeout': 2.0, 'num_of_generations': 15, 'pop_size': 10, 'metric_names': ('f1', 'accuracy'), 'logging_level': 10, 'n_jobs': -1, 'initial_assumption': comparasion_dict, + 'output_folder': r'D:\\WORK\\Repo\\Industiral\\IndustrialTS/benchmark/results/', 'finetune': True} if __name__ == "__main__": diff --git a/examples/benchmark_example/regression/PDL_multi.py b/examples/benchmark_example/regression/PDL_multi.py new file mode 100644 index 000000000..82d3218e8 --- /dev/null +++ b/examples/benchmark_example/regression/PDL_multi.py @@ -0,0 +1,45 @@ +from benchmark.benchmark_TSER import BenchmarkTSER + +init_assumption_pdl = ['quantile_extractor', 'pdl_reg'] +init_assumption_rf = ['quantile_extractor', 'treg'] +comparasion_dict = dict(pairwise_approach=init_assumption_pdl, + baseline=init_assumption_rf) +experiment_setup = { + 'problem': 'regression', + 'metric': 'rmse', + 'timeout': 2.0, + 'num_of_generations': 15, + 'pop_size': 10, + 'metric_names': ('f1', 'accuracy'), + 'logging_level': 10, + 'n_jobs': -1, + 'initial_assumption': comparasion_dict, + 'finetune': True} +custom_dataset = [ + # 'ElectricMotorTemperature', + # 'PrecipitationAndalusia', + # 'AcousticContaminationMadrid', + # 'WindTurbinePower', + # 'DailyOilGasPrices', + # 'DailyTemperatureLatitude', + # 'LPGasMonitoringHomeActivity', + # 'AluminiumConcentration', + # 'BoronConcentration', + # 'CopperConcentration', + # # 'IronConcentration', + # 'ManganeseConcentration', + # 'SodiumConcentration', + # 'PhosphorusConcentration', + # 'PotassiumConcentration', + 'MagnesiumConcentration', + 'SulphurConcentration', + 'ZincConcentration', + 'CalciumConcentration' +] +custom_dataset = None +if __name__ == "__main__": + benchmark = BenchmarkTSER(experiment_setup=experiment_setup, + custom_datasets=custom_dataset + ) + benchmark.run() + _ = 1 diff --git a/examples/real_world_examples/industrial_examples/equipment_monitoring/parma_example.py b/examples/real_world_examples/industrial_examples/equipment_monitoring/parma_example.py new file mode 100644 index 000000000..0c4eee12b --- /dev/null +++ b/examples/real_world_examples/industrial_examples/equipment_monitoring/parma_example.py @@ -0,0 +1,58 @@ +import gc + +import numpy as np + +from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate +from fedot_ind.core.operation.transformation.data.park_transformation import park_transform + +gc.collect() +metric_names = ('f1', 'accuracy', 'precision', 'roc_auc') +take_only_inst_phase = False +sampling_window = 4000 +if take_only_inst_phase: + take_only_inst_phase = 4 +else: + take_only_inst_phase = 1 +stat_params = {'window_size': 10, 'stride': 1, 'add_global_features': True, 'use_sliding_window': False} +fourier_params = {'low_rank': 5, 'output_format': 'signal', 'approximation': 'smooth', 'threshold': 0.9} +wavelet_params = {'n_components': 3, 'wavelet': 'bior3.7'} +park_params = {} +feature_generator = { + # 'fourier+stat': [('fourier_basis', fourier_params), ('quantile_extractor', stat_params)], + 'wavelet+stat': [('wavelet_basis', wavelet_params), ('quantile_extractor', stat_params)], + 'stat_generator': [('quantile_extractor', stat_params)]} + +if __name__ == "__main__": + finetune = False + + train_features, train_target = park_transform(np.load('./X_train.npy').swapaxes(1, 2))[:, take_only_inst_phase:, + :sampling_window] \ + , np.load('./y_train.npy') + test_features, test_target = park_transform(np.load('./X_test.npy').swapaxes(1, 2))[:, take_only_inst_phase:, + :sampling_window] \ + , np.load('./y_test.npy') + input_train = (train_features, train_target) + input_test = (test_features, test_target) + + dataset = dict(test_data=input_test, train_data=input_train) + + api_config = dict(problem='classification', + metric='f1', + timeout=120, + pop_size=5, + early_stopping_iterations=20, + early_stopping_timeout=100, + with_tunig=False, + preset='classification_tabular', + industrial_strategy_params={'feature_generator': feature_generator, + 'data_type': 'tensor', + 'learning_strategy': 'ts2tabular', + }, + n_jobs=2, + logging_level=20) + + result_dict = ApiTemplate(api_config=api_config, + metric_list=metric_names).eval(dataset=dataset, + finetune=finetune) + metrics = result_dict['metrics'] + _ = 1 diff --git a/fedot_ind/core/models/pdl/pairwise_model.py b/fedot_ind/core/models/pdl/pairwise_model.py index caf8ad0e7..a975287c6 100644 --- a/fedot_ind/core/models/pdl/pairwise_model.py +++ b/fedot_ind/core/models/pdl/pairwise_model.py @@ -156,8 +156,12 @@ def correct_sample_weight(sample_weight: pd.Series, y_train: pd.Series) -> pd.Se return sample_weight @staticmethod - def predict(y_prob: np.ndarray, output_mode: str = 'default'): - predicted_classes = np.argmax(y_prob, axis=1)[..., np.newaxis] if output_mode.__contains__('label') else y_prob + def predict(y_prob: np.ndarray, output_mode: str = 'default', min_label_zero: bool = True): + if output_mode.__contains__('label'): + predicted_classes = np.argmax(y_prob, axis=1)[..., np.newaxis] + predicted_classes = predicted_classes if min_label_zero else predicted_classes + 1 + else: + predicted_classes = y_prob return predicted_classes @@ -176,6 +180,12 @@ def __init__(self, params: Optional[OperationParameters] = None): self.proba_aggregate_method = 'norm' self.sample_weight_ = None + def _check_target(self): + if self.target.min() != 0: + self.target_start_zero = False + else: + self.target_start_zero = True + def _estimate_prior(self): if self.prior is not None: return self @@ -195,8 +205,10 @@ def fit(self, self.classes_ = sklearn.utils.multiclass.unique_labels(input_data.target) self.train_features = input_data.features # Store the classes seen during fit self._estimate_prior() + self._check_target() X_pair, _ = self.pde.pair_input(input_data.features, input_data.features) y_pair_diff = self.pde.pair_output_difference(self.target, self.target, self.num_classes) + self.base_model.fit(X_pair, y_pair_diff) return self @@ -256,7 +268,7 @@ def f(predictions_proba_similarity: pd.Series) -> pd.Series: # without this normalization it should work for multiclass-multilabel if self.proba_aggregate_method == 'norm': tests_classes_likelihood_np = tests_classes_likelihood_np.values \ - / tests_classes_likelihood_np.values.sum(axis=-1)[:, np.newaxis] + / tests_classes_likelihood_np.values.sum(axis=-1)[:, np.newaxis] elif self.proba_aggregate_method == 'softmax': tests_classes_likelihood_np = softmax(tests_classes_likelihood_np, axis=-1) return tests_classes_likelihood_np @@ -312,7 +324,7 @@ def _abstract_predict(self, monoid=[input_data.features, self.use_prior]).either( left_function=lambda features: self.__predict_without_prior(features, sample_weight), right_function=lambda features: self.__predict_with_prior(features, sample_weight)) - return self.pde.predict(predict_output, output_mode) + return self.pde.predict(predict_output, output_mode, self.target_start_zero) def predict(self, input_data: InputData, diff --git a/fedot_ind/core/repository/model_repository.py b/fedot_ind/core/repository/model_repository.py index f4909c456..69189b7cc 100644 --- a/fedot_ind/core/repository/model_repository.py +++ b/fedot_ind/core/repository/model_repository.py @@ -45,7 +45,6 @@ from fedot_ind.core.models.detection.custom.stat_detector import StatisticalDetector from fedot_ind.core.models.detection.probalistic.kalman import UnscentedKalmanFilter from fedot_ind.core.models.detection.subspaces.sst import SingularSpectrumTransformation -from fedot_ind.core.models.manifold.riemann_embeding import RiemannExtractor from fedot_ind.core.models.nn.network_impl.deep_tcn import TCNModel from fedot_ind.core.models.nn.network_impl.deepar import DeepAR from fedot_ind.core.models.nn.network_impl.dummy_nn import DummyOverComplicatedNeuralNetwork @@ -57,14 +56,16 @@ from fedot_ind.core.models.nn.network_impl.resnet import ResNetModel from fedot_ind.core.models.nn.network_impl.tst import TSTModel from fedot_ind.core.models.pdl.pairwise_model import PairwiseDifferenceClassifier, PairwiseDifferenceRegressor -from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor -from fedot_ind.core.models.recurrence.reccurence_extractor import RecurrenceExtractor -from fedot_ind.core.models.topological.topological_extractor import TopologicalExtractor from fedot_ind.core.models.ts_forecasting.glm import GLMIndustrial from fedot_ind.core.operation.filtration.channel_filtration import ChannelCentroidFilter from fedot_ind.core.operation.transformation.basis.eigen_basis import EigenBasisImplementation from fedot_ind.core.operation.transformation.basis.fourier import FourierBasisImplementation from fedot_ind.core.operation.transformation.basis.wavelet import WaveletBasisImplementation +from fedot_ind.core.operation.transformation.representation.manifold.riemann_embeding import RiemannExtractor +from fedot_ind.core.operation.transformation.representation.recurrence.reccurence_extractor import RecurrenceExtractor +from fedot_ind.core.operation.transformation.representation.statistical.quantile_extractor import QuantileExtractor +from fedot_ind.core.operation.transformation.representation.topological.topological_extractor import \ + TopologicalExtractor from fedot_ind.core.repository.excluded import EXCLUDED_OPERATION_MUTATION, TEMPORARY_EXCLUDED