diff --git a/benchmark/abstract_bench.py b/benchmark/abstract_bench.py index d29e6c4ce..cf991c6af 100644 --- a/benchmark/abstract_bench.py +++ b/benchmark/abstract_bench.py @@ -58,6 +58,8 @@ def evaluate_loop(self, dataset, experiment_setup: dict = None): matplotlib.use('TkAgg') train_data, test_data = DataLoader(dataset_name=dataset).load_data() experiment_setup['output_folder'] = experiment_setup['output_folder'] + f'/{dataset}' + if 'tuning_params' in experiment_setup.keys(): + del experiment_setup['tuning_params'] model = FedotIndustrial(**experiment_setup) model.fit(train_data) prediction = model.predict(test_data) @@ -71,6 +73,19 @@ def evaluate_loop(self, dataset, experiment_setup: dict = None): print('No_visualisation') gc.collect() return prediction, model.predict_data.target + + def finetune_loop(self, dataset, experiment_setup: dict = None): + train_data, test_data = DataLoader(dataset_name=dataset).load_data() + experiment_setup['output_folder'] = experiment_setup['output_folder'] + f'/{dataset}' + tuning_params = experiment_setup['tuning_params'] + del experiment_setup['tuning_params'] + model = FedotIndustrial(**experiment_setup) + model.load(path=experiment_setup['output_folder'] + '/0_pipeline_saved') + model.finetune(train_data, tuning_params=tuning_params) + prediction = model.finetune_predict(test_data) + gc.collect() + return prediction, model.predict_data.target + def collect_results(self, output_dir): """Collect the results of the benchmark. diff --git a/benchmark/benchmark_TSC.py b/benchmark/benchmark_TSC.py index 26027fab9..34e4542cc 100644 --- a/benchmark/benchmark_TSC.py +++ b/benchmark/benchmark_TSC.py @@ -55,7 +55,8 @@ def run(self): metric = Accuracy(target, prediction).metric() metric_dict.update({dataset_name: metric}) basic_results.loc[dataset_name, 'Fedot_Industrial'] = metric - dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}', 'metrics_report.csv') + dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}', + 'metrics_report.csv') basic_results.to_csv(dataset_path) except Exception: print('Skip dataset') @@ -64,6 +65,29 @@ def run(self): basic_results.to_csv(basic_path) self.logger.info("Benchmark test finished") + def finetune(self): + self.logger.info('Benchmark finetune started') + for dataset_name in self.custom_datasets: + try: + composed_model_path = PROJECT_PATH + self.path_to_save + f'/{dataset_name}' + '/0_pipeline_saved' + if os.path.isdir(composed_model_path): + self.experiment_setup['output_folder'] = PROJECT_PATH + self.path_to_save + experiment_setup = deepcopy(self.experiment_setup) + prediction, target = self.finetune_loop(dataset_name, experiment_setup) + metric = Accuracy(target, prediction).metric() + dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}', + 'metrics_report.csv') + fedot_results = pd.read_csv(dataset_path, index_col=0) + fedot_results.loc[dataset_name, 'Fedot_Industrial_finetuned'] = metric + + fedot_results.to_csv(dataset_path) + else: + print(f"No composed model for dataset - {dataset_name}") + except Exception: + print('Skip dataset') + gc.collect() + self.logger.info("Benchmark finetune finished") + def load_local_basic_results(self, path: str = None): if path is None: path = PROJECT_PATH + self.path_to_result @@ -83,4 +107,3 @@ def load_web_results(self): sota_results = get_estimator_results(estimators=sota_estimators['classification'].values.tolist()) sota_results_df = pd.DataFrame(sota_results) return sota_results_df - diff --git a/examples/benchmark/time_series_uni_clf_benchmark.py b/examples/benchmark/time_series_uni_clf_benchmark.py index 7d91dfb14..1766120c1 100644 --- a/examples/benchmark/time_series_uni_clf_benchmark.py +++ b/examples/benchmark/time_series_uni_clf_benchmark.py @@ -27,6 +27,9 @@ 'initial_assumption': None, 'max_pipeline_fit_time': 10, 'with_tuning': False, + 'tuning_params': {'tuning_timeout': 10, + 'tuning_iterations': 1000, + 'tuning_early_stop': 50}, 'early_stopping_iterations': 5, 'early_stopping_timeout': 90, 'optimizer': IndustrialEvoOptimizer} @@ -36,41 +39,41 @@ custom_datasets=[ # "Beef", # "BeetleFly", - "BirdChicken", - "BME", - "Car", - "CBF", - "Chinatown", - "ChlorineConcentration", - "CinCECGTorso", - "Coffee", - "Computers", - "CricketX", - "CricketY", - "CricketZ", - "Crop", - "DiatomSizeReduction", - "DistalPhalanxOutlineCorrect", - "DistalPhalanxOutlineAgeGroup", - "DistalPhalanxTW", - "Earthquakes", - "ECG200", - "ECG5000", - "ECGFiveDays", - "ElectricDevices", - "EOGHorizontalSignal", - "EOGVerticalSignal", - "EthanolLevel", - "FaceAll", - "FaceFour", - "FacesUCR", - "FiftyWords", - "Fish", - "FordA", - "FordB", - "FreezerRegularTrain", - "FreezerSmallTrain", - "Fungi", + # "BirdChicken", + # "BME", + # "Car", + # "CBF", + # "Chinatown", + # "ChlorineConcentration", + # "CinCECGTorso", + # "Coffee", + # "Computers", + # "CricketX", + # "CricketY", + # "CricketZ", + # "Crop", + # "DiatomSizeReduction", + # "DistalPhalanxOutlineCorrect", + # "DistalPhalanxOutlineAgeGroup", + # "DistalPhalanxTW", + # "Earthquakes", + # "ECG200", + # "ECG5000", + # "ECGFiveDays", + # "ElectricDevices", + # "EOGHorizontalSignal", + # "EOGVerticalSignal", + # "EthanolLevel", + # "FaceAll", + # "FaceFour", + # "FacesUCR", + # "FiftyWords", + # "Fish", + # "FordA", + # "FordB", + # "FreezerRegularTrain", + # "FreezerSmallTrain", + # "Fungi", "GunPoint", "GunPointAgeSpan", "GunPointMaleVersusFemale", @@ -147,3 +150,4 @@ ], use_small_datasets=True) benchmark.run() + #benchmark.finetune() diff --git a/fedot_ind/api/main.py b/fedot_ind/api/main.py index f2565b05b..a6e959381 100644 --- a/fedot_ind/api/main.py +++ b/fedot_ind/api/main.py @@ -1,4 +1,5 @@ import logging +from functools import partial from pathlib import Path from typing import Tuple, Union @@ -7,7 +8,10 @@ from fedot.api.main import Fedot from fedot.core.pipelines.pipeline import Pipeline from fedot.core.pipelines.pipeline_builder import PipelineBuilder - +from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder +from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum +from golem.core.tuning.simultaneous import SimultaneousTuner +from golem.core.tuning.sequential import SequentialTuner from fedot_ind.api.utils.checkers_collections import DataCheck from fedot_ind.api.utils.path_lib import default_path_to_save_results from fedot_ind.core.operation.transformation.splitter import TSTransformer @@ -98,7 +102,6 @@ def _preprocessing_strategy(self, input_data): self.logger.info(f'Dataset size before preprocessing - {input_data.features.shape}') self.logger.info('PCA transformation was applied to input data due to dataset size') if len(input_data.features.shape) == 3: - #self.preprocessing_model = PipelineBuilder().add_node('eigen_basis', params={'tensor_approximation': True}).build() self.preprocessing_model = PipelineBuilder().add_node('pca', params={'n_components': 0.9}).build() else: self.preprocessing_model = PipelineBuilder().add_node('pca', params={'n_components': 0.9}).build() @@ -161,6 +164,48 @@ def predict_proba(self, predict_data, **kwargs) -> np.ndarray: self.logger.info(f'Test Dataset size after preprocessing - {self.predict_data.features.shape}') return self.solver.predict_proba(self.predict_data) + def finetune(self, train_data, tuning_params) -> np.ndarray: + """ + Method to obtain prediction probabilities from trained Industrial model. + + Args: + test_features: raw test data + + Returns: + the array with prediction probabilities + + """ + train_data = DataCheck(input_data=train_data, task=self.config_dict['problem']).check_input_data() + if train_data.num_classes > 2: + metric = ClassificationMetricsEnum.f1 + else: + metric = ClassificationMetricsEnum.accuracy + tuning_method = partial(SequentialTuner, inverse_node_order=True) + tuning_method = SimultaneousTuner + pipeline_tuner = TunerBuilder(train_data.task) \ + .with_tuner(tuning_method) \ + .with_metric(metric) \ + .with_timeout(tuning_params['tuning_timeout']) \ + .with_early_stopping_rounds(tuning_params['tuning_early_stop']) \ + .with_iterations(tuning_params['tuning_iterations']) \ + .build(train_data) + self.current_pipeline = pipeline_tuner.tune(self.current_pipeline) + self.current_pipeline.fit(train_data) + + def finetune_predict(self, test_data) -> np.ndarray: + """ + Method to obtain prediction probabilities from trained Industrial model. + + Args: + test_features: raw test data + + Returns: + the array with prediction probabilities + + """ + self.predict_data = DataCheck(input_data=test_data, task=self.config_dict['problem']).check_input_data() + return self.current_pipeline.predict(self.predict_data, 'labels').predict + def get_metrics(self, **kwargs) -> dict: """ Method to obtain Gets quality metrics @@ -210,7 +255,8 @@ def load(self, path): path (str): path to the model """ - raise NotImplementedError() + self.current_pipeline = Pipeline(use_input_preprocessing=self.solver.params.get('use_input_preprocessing')) + self.current_pipeline.load(path) def save_optimization_history(self, **kwargs): """Plot prediction of the model""" diff --git a/fedot_ind/core/models/base_extractor.py b/fedot_ind/core/models/base_extractor.py index 2f9ca725f..e17d077a2 100644 --- a/fedot_ind/core/models/base_extractor.py +++ b/fedot_ind/core/models/base_extractor.py @@ -106,7 +106,7 @@ def get_statistical_features(self, for method in list_of_methods: try: - features.append(method[1](time_series)) + features.append(method[1](time_series).reshape(-1,1)) names.append(method[0]) except ValueError: continue diff --git a/fedot_ind/core/models/signal/signal_extractor.py b/fedot_ind/core/models/signal/signal_extractor.py index ac6f3dd9f..3f4cedfcd 100644 --- a/fedot_ind/core/models/signal/signal_extractor.py +++ b/fedot_ind/core/models/signal/signal_extractor.py @@ -24,7 +24,7 @@ def __init__(self, params: Optional[OperationParameters] = None): def _transform(self, input_data: InputData) -> np.array: wavelet_basis = self.wavelet_basis({'n_components': self.n_components, - 'wavelet': self.wavelet}) + 'wavelet':self.wavelet}) transformed_features = wavelet_basis.transform(input_data) predict = self._clean_predict(transformed_features.predict) return predict diff --git a/fedot_ind/core/operation/transformation/basis/abstract_basis.py b/fedot_ind/core/operation/transformation/basis/abstract_basis.py index e291ce35c..3dbb17434 100644 --- a/fedot_ind/core/operation/transformation/basis/abstract_basis.py +++ b/fedot_ind/core/operation/transformation/basis/abstract_basis.py @@ -77,9 +77,10 @@ def _transform(self, input_data: Union[InputData, pd.DataFrame]) -> np.array: features = np.array(ListMonad(*input_data.features.tolist()).value) else: features = np.array(ListMonad(*input_data.tolist()).value) - #features = np.array([series[~np.isnan(series)] for series in features]) if len(features.shape) == 2 and features.shape[1] == 1: features = features.reshape(1, -1) + elif len(features.shape) == 3 and features.shape[1] == 1: + features = features.squeeze() parallel = Parallel(n_jobs=self.n_processes, verbose=0, pre_dispatch="2*n_jobs") v = parallel(delayed(self._transform_one_sample)(sample) for sample in features) diff --git a/fedot_ind/core/repository/initializer_industrial_models.py b/fedot_ind/core/repository/initializer_industrial_models.py index 44b75107e..80578f2df 100644 --- a/fedot_ind/core/repository/initializer_industrial_models.py +++ b/fedot_ind/core/repository/initializer_industrial_models.py @@ -3,10 +3,16 @@ from enum import Enum from typing import Sequence from random import choice, sample +from typing import List, Iterable, Union, Optional + +import numpy as np + +from fedot.core.data.array_utilities import atleast_4d from fedot.api.api_utils.api_composer import ApiComposer from fedot.api.api_utils.api_params_repository import ApiParamsRepository from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation, boosting_mutation +from fedot.core.data.merge.data_merger import ImageDataMerger from fedot.core.pipelines.adapters import PipelineAdapter from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline @@ -309,6 +315,46 @@ def has_no_data_flow_conflicts_in_industrial_pipeline(pipeline: Pipeline): return True +def preprocess_predicts(*args) -> List[np.array]: + predicts = args[1] + if len(predicts[0].shape) <= 3: + return predicts + else: + reshaped_predicts = list(map(atleast_4d, predicts)) + + # And check image sizes + img_wh = [predict.shape[1:3] for predict in reshaped_predicts] + invalid_sizes = len(set(img_wh)) > 1 # Can merge only images of the same size + if invalid_sizes: + raise ValueError("Can't merge images of different sizes: " + str(img_wh)) + return reshaped_predicts + + +def merge_predicts(*args) -> np.array: + predicts = args[1] + predicts = [x.reshape(-1, 1) if len(x.shape) == 1 else x for x in predicts] + + channel_shape, elem_shape = [(x.shape[1], x.shape[2]) if len(x.shape) > 2 else (1, x.shape[0]) for x in predicts][0] + + chanel_concat = [x.shape[1] == channel_shape if len(x.shape) > 2 + else 1 == channel_shape for x in predicts] + + element_wise_concat = [x.shape[2] == elem_shape if len(x.shape) > 2 + else x.shape[1] == elem_shape for x in predicts] + + if all(chanel_concat) and all(element_wise_concat): + try: + return np.concatenate(predicts, axis=1) + except Exception: + return np.concatenate(predicts, axis=0) + elif not all(chanel_concat) and not all(element_wise_concat): + prediction_2d = np.concatenate([x.reshape(x.shape[0], x.shape[1] * x.shape[2]) if len(x.shape) > 2 + else x for x in predicts], axis=1) + return prediction_2d.reshape(prediction_2d.shape[0], 1, prediction_2d.shape[1]) + else: + return np.concatenate(predicts, axis=1) + + class IndustrialModels: def __init__(self): self.industrial_data_operation_path = pathlib.Path(PROJECT_PATH, 'fedot_ind', @@ -341,6 +387,8 @@ def setup_repository(self): setattr(PipelineSearchSpace, "get_parameters_dict", get_industrial_search_space) setattr(ApiParamsRepository, "_get_default_mutations", _get_default_industrial_mutations) + setattr(ImageDataMerger, "preprocess_predicts", preprocess_predicts) + setattr(ImageDataMerger, "merge_predicts", merge_predicts) class_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline) MutationStrengthEnum = MutationStrengthEnumIndustrial # common_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline) diff --git a/fedot_ind/core/tuning/search_space.py b/fedot_ind/core/tuning/search_space.py index 609e44ae5..f600b5d42 100644 --- a/fedot_ind/core/tuning/search_space.py +++ b/fedot_ind/core/tuning/search_space.py @@ -11,7 +11,7 @@ 'wavelet_basis': {'n_components': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 10]}, 'wavelet': {'hyperopt-dist': hp.choice, - 'sampling-scope': [['mexh', 'shan', 'morl', 'cmor', 'fbsp', 'db5', 'sym5']]}}, + 'sampling-scope': [['mexh', 'morl', 'db5', 'sym5']]}}, 'fourier_basis': {'threshold': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [10000, 50000]}}, 'topological_extractor': @@ -26,8 +26,9 @@ # 'rec_metric': (hp.choice, [['chebyshev', 'cosine', 'euclidean', 'mahalanobis']]) }, 'signal_extractor': - {'wavelet': {'hyperopt-dist': hp.choice, - 'sampling-scope': [['mexh', 'shan', 'morl', 'cmor', 'fbsp', 'db5', 'sym5']]}}, + {'n_components': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 10]}, + 'wavelet': {'hyperopt-dist': hp.choice, + 'sampling-scope': [['mexh', 'morl', 'db5', 'sym5']]}}, 'minirocket_extractor': {'num_features': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5000, 20000, 1000)]]}}, 'patch_tst_model': @@ -320,6 +321,22 @@ def get_industrial_search_space(self): 'sampling-scope': [1, 365], 'type': 'discrete'} }, + 'mlp': { + 'hidden_layer_sizes': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[(256, 128, 64, 32), (1028, 512, 64,)]], + 'type': 'categorical'}, + 'activation': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['logistic', 'tanh', 'relu']], + 'type': 'categorical'}, + 'max_iter': {'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1000, 2000], + 'type': 'discrete'}, + 'learning_rate': {'hyperopt-dist': hp.choice, + 'sampling-scope': [['constant', 'adaptive']], + 'type': 'categorical'} + }, 'ar': { 'lag_1': { 'hyperopt-dist': hp.uniform,