diff --git a/.github/ISSUE_TEMPLATE/sweep-template.yml b/.github/ISSUE_TEMPLATE/sweep-template.yml deleted file mode 100644 index 9d687a3fb..000000000 --- a/.github/ISSUE_TEMPLATE/sweep-template.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Sweep Issue -title: 'Sweep: ' -description: For small bugs, features, refactors, and tests to be handled by Sweep, an AI-powered junior developer. -labels: sweep -body: - - type: textarea - id: description - attributes: - label: Details - description: Tell Sweep where and what to edit and provide enough context for a new developer to the codebase - placeholder: | - Unit Tests: Write unit tests for . Test each function in the file. Make sure to test edge cases. - Bugs: The bug might be in . Here are the logs: ... - Features: the new endpoint should use the ... class from because it contains ... logic. - Refactors: We are migrating this function to ... version because ... - - type: input - id: branch - attributes: - label: Branch - description: The branch to work off of (optional) - placeholder: | - main \ No newline at end of file diff --git a/.github/workflows/poetry_unit_test.yml b/.github/workflows/poetry_unit_test.yml index f0ad777d2..cb46640a4 100644 --- a/.github/workflows/poetry_unit_test.yml +++ b/.github/workflows/poetry_unit_test.yml @@ -13,7 +13,7 @@ jobs: timeout-minutes: 60 strategy: matrix: - python-version: [3.9, '3.10'] + python-version: [3.9, '3.10', '3.11'] steps: - uses: actions/checkout@v2 diff --git a/benchmark/benchmark_TSC.py b/benchmark/benchmark_TSC.py index 0f7edb5f1..4e4b8de2c 100644 --- a/benchmark/benchmark_TSC.py +++ b/benchmark/benchmark_TSC.py @@ -9,6 +9,7 @@ from benchmark.abstract_bench import AbstractBenchmark from fedot_ind.api.utils.path_lib import PROJECT_PATH +from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate from fedot_ind.core.architecture.postprocessing.results_picker import ResultsPicker from fedot_ind.core.architecture.settings.computational import backend_methods as np from fedot_ind.core.metrics.metrics_implementation import Accuracy @@ -27,6 +28,7 @@ def __init__(self, self.logger = logging.getLogger(self.__class__.__name__) self.experiment_setup = experiment_setup + self.init_assumption = deepcopy(self.experiment_setup['initial_assumption']) self.multi_TSC = MULTI_CLF_BENCH self.uni_TSC = UNI_CLF_BENCH if custom_datasets is None: @@ -38,35 +40,50 @@ def __init__(self, self.custom_datasets = custom_datasets if use_small_datasets: - self.path_to_result = '/benchmark/results/time_series_uni_clf_comparasion.csv' - self.path_to_save = '/benchmark/results/ts_uni_classification' + self.path_to_result = 'time_series_uni_clf_comparasion.csv' + self.path_to_save = 'ts_uni_classification' else: - self.path_to_result = '/benchmark/results/time_series_multi_clf_comparasion.csv' - self.path_to_save = '/benchmark/results/ts_multi_classification' - self.results_picker = ResultsPicker( - path=os.path.abspath(self.output_dir)) + self.path_to_result = 'time_series_multi_clf_comparasion.csv' + self.path_to_save = 'ts_multi_classification' + self.output_dir = os.path.join(self.experiment_setup['output_folder'], self.path_to_save) + self.results_picker = ResultsPicker(path=os.path.abspath(self.output_dir)) + + def _run_model_versus_model(self, dataset_name, comparasion_dict): + approach_dict = {} + for approach in comparasion_dict.keys(): + result_dict = ApiTemplate(api_config=self.experiment_setup, + metric_list=self.experiment_setup['metric_names']). \ + eval(dataset=dataset_name, + initial_assumption=comparasion_dict[approach], + finetune=self.experiment_setup['finetune']) + metric = result_dict['metrics'][self.experiment_setup['metric']][0] + approach_dict.update({approach: metric}) + return approach_dict + + def _run_industrial_versus_sota(self, dataset_name): + experiment_setup = deepcopy(self.experiment_setup) + prediction, target = self.evaluate_loop(dataset_name, experiment_setup) + return Accuracy(target, prediction).metric() def run(self): self.logger.info('Benchmark test started') basic_results = self.load_local_basic_results() metric_dict = {} for dataset_name in self.custom_datasets: - experiment_setup = deepcopy(self.experiment_setup) - prediction, target = self.evaluate_loop( - dataset_name, experiment_setup) - metric = Accuracy(target, prediction).metric() - metric_dict.update({dataset_name: metric}) - basic_results.loc[dataset_name, 'Fedot_Industrial'] = metric - dataset_path = os.path.join( - self.experiment_setup['output_folder'], - f'{dataset_name}', - 'metrics_report.csv') - basic_results.to_csv(dataset_path) - gc.collect() - basic_path = os.path.join( - self.experiment_setup['output_folder'], - 'comprasion_metrics_report.csv') - basic_results.to_csv(basic_path) + try: + if isinstance(self.init_assumption, dict): + model_name = list(self.init_assumption.keys()) + metric = self._run_model_versus_model(dataset_name, self.init_assumption) + else: + metric = self._run_industrial_versus_sota() + model_name = 'Fedot_Industrial' + metric_dict.update({dataset_name: metric}) + basic_results.loc[dataset_name, model_name] = metric + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + basic_results.to_csv(os.path.join(self.output_dir, self.path_to_result)) + except Exception: + self.logger.info(f"{dataset_name} problem with eval") self.logger.info("Benchmark test finished") def finetune(self): @@ -128,15 +145,14 @@ def finetune(self): def load_local_basic_results(self, path: str = None): if path is None: - path = PROJECT_PATH + self.path_to_result + path = os.path.join(self.output_dir, self.path_to_result) try: results = pd.read_csv(path, sep=',', index_col=0) - results = results.dropna(axis=1, how='all') - results = results.dropna(axis=0, how='all') + # results = results.fillna() + # results = results.dropna(axis=1, how='all') + # results = results.dropna(axis=0, how='all') except Exception: results = self.load_web_results() - self.experiment_setup['output_folder'] = PROJECT_PATH + \ - self.path_to_save return results else: return self.results_picker.run(get_metrics_df=True, add_info=True) diff --git a/benchmark/benchmark_TSER.py b/benchmark/benchmark_TSER.py index 61e985025..b5b7c170e 100644 --- a/benchmark/benchmark_TSER.py +++ b/benchmark/benchmark_TSER.py @@ -1,19 +1,20 @@ -from fedot_ind.core.repository.constanst_repository import MULTI_REG_BENCH -from fedot_ind.core.architecture.postprocessing.results_picker import ResultsPicker -from benchmark.abstract_bench import AbstractBenchmark -from fedot_ind.core.metrics.metrics_implementation import RMSE -from fedot_ind.api.utils.path_lib import PROJECT_PATH -from fedot_ind.api.main import FedotIndustrial -from fedot.core.pipelines.pipeline import Pipeline -from fedot.core.pipelines.node import PipelineNode -import pandas as pd import logging import os from abc import ABC from copy import deepcopy import matplotlib +import pandas as pd +from fedot.core.pipelines.node import PipelineNode +from fedot.core.pipelines.pipeline import Pipeline +from benchmark.abstract_bench import AbstractBenchmark +from fedot_ind.api.main import FedotIndustrial +from fedot_ind.api.utils.path_lib import PROJECT_PATH +from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate +from fedot_ind.core.architecture.postprocessing.results_picker import ResultsPicker +from fedot_ind.core.metrics.metrics_implementation import RMSE +from fedot_ind.core.repository.constanst_repository import MULTI_REG_BENCH from fedot_ind.tools.loader import DataLoader matplotlib.use('TkAgg') @@ -31,6 +32,7 @@ def __init__(self, self.logger = logging.getLogger(self.__class__.__name__) self.experiment_setup = experiment_setup + self.init_assumption = deepcopy(self.experiment_setup['initial_assumption']) self.monash_regression = MULTI_REG_BENCH if custom_datasets is None: self.custom_datasets = self.monash_regression @@ -40,26 +42,43 @@ def __init__(self, self.results_picker = ResultsPicker( path=os.path.abspath(self.output_dir)) + def _run_model_versus_model(self, dataset_name, comparasion_dict): + approach_dict = {} + for approach in comparasion_dict.keys(): + result_dict = ApiTemplate(api_config=self.experiment_setup, + metric_list=self.experiment_setup['metric_names']). \ + eval(dataset=dataset_name, + initial_assumption=comparasion_dict[approach], + finetune=self.experiment_setup['finetune']) + metric = result_dict['metrics'][self.experiment_setup['metric']][0] + approach_dict.update({approach: metric}) + return approach_dict + + def _run_industrial_versus_sota(self, dataset_name): + experiment_setup = deepcopy(self.experiment_setup) + prediction, target = self.evaluate_loop(dataset_name, experiment_setup) + return RMSE(target, prediction).metric() + def run(self): self.logger.info('Benchmark test started') basic_results = self.load_local_basic_results() metric_dict = {} for dataset_name in self.custom_datasets: - experiment_setup = deepcopy(self.experiment_setup) - prediction, target = self.evaluate_loop( - dataset_name, experiment_setup) - metric = RMSE(target, prediction).metric() - metric_dict.update({dataset_name: metric}) - basic_results.loc[dataset_name, 'Fedot_Industrial'] = metric - dataset_path = os.path.join( - self.experiment_setup['output_folder'], - f'{dataset_name}', - 'metrics_report.csv') - basic_results.to_csv(dataset_path) - basic_path = os.path.join( - self.experiment_setup['output_folder'], - 'comprasion_metrics_report.csv') - basic_results.to_csv(basic_path) + try: + if isinstance(self.init_assumption, dict): + model_name = list(self.init_assumption.keys()) + metric = self._run_model_versus_model(dataset_name, self.init_assumption) + else: + metric = self._run_industrial_versus_sota(dataset_name) + model_name = 'Fedot_Industrial' + metric_dict.update({dataset_name: metric}) + basic_results.loc[dataset_name, model_name] = metric + basic_path = os.path.join(self.experiment_setup['output_folder']) + if not os.path.exists(basic_path): + os.makedirs(basic_path) + basic_results.to_csv(os.path.join(basic_path, 'comprasion_metrics_report.csv')) + except Exception: + self.logger.info(f"{dataset_name} problem with eval") self.logger.info("Benchmark test finished") def load_local_basic_results(self, path: str = None): diff --git a/examples/automl_example/api_example/advanced_example/__init__.py b/examples/__init__.py similarity index 100% rename from examples/automl_example/api_example/advanced_example/__init__.py rename to examples/__init__.py diff --git a/examples/automl_example/api_example/advanced_example/explainability/__init__.py b/examples/automl_example/__init__.py similarity index 100% rename from examples/automl_example/api_example/advanced_example/explainability/__init__.py rename to examples/automl_example/__init__.py diff --git a/examples/automl_example/api_example/advanced_example/specific_strategy/random_sampling_example.py b/examples/automl_example/api_example/advanced_example/specific_strategy/random_sampling_example.py deleted file mode 100644 index 0a0ac1a4d..000000000 --- a/examples/automl_example/api_example/advanced_example/specific_strategy/random_sampling_example.py +++ /dev/null @@ -1,53 +0,0 @@ -import pickle - -import numpy as np - -from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate - -model_list = dict(logit=['logit'], rf=['rf'], xgboost=['xgboost']) -finetune = False -task = 'classification' -sampling_range = [0.01, 0.15, 0.3, 0.6] -sampling_algorithm = ['CUR', - 'Random'] - - -def create_big_dataset(): - train_X, test_X = np.load( - './examples/big_dataset/train_airlinescodrnaadult_fold0.npy'), np.load( - './examples/big_dataset/test_airlinescodrnaadult_fold0.npy') - train_y, test_y = np.load( - './examples/big_dataset/trainy_airlinescodrnaadult_fold0.npy'), np.load( - './examples/big_dataset/testy_airlinescodrnaadult_fold0.npy') - dataset_dict = dict(train_data=(train_X, train_y), - test_data=(test_X, test_y)) - return dataset_dict - - -if __name__ == "__main__": - results_of_experiments_dict = {} - dataset_dict = create_big_dataset() - # df = pd.read_pickle('./sampling_experiment.pkl') - for algo in sampling_algorithm: - api_config = dict( - problem=task, - metric='f1', - timeout=0.1, - with_tuning=False, - industrial_strategy='sampling_strategy', - industrial_strategy_params={ - 'industrial_task': task, - 'sampling_algorithm': algo, - 'sampling_range': sampling_range, - 'data_type': 'table'}, - logging_level=30) - algo_result = {} - for model_name, model in model_list.items(): - result_dict = ApiTemplate(api_config=api_config, - metric_list=('f1', 'accuracy')).eval(dataset=dataset_dict, - finetune=finetune, - initial_assumption=model) - algo_result.update({f'{algo}_{model_name}': result_dict['metrics']}) - results_of_experiments_dict.update({algo: algo_result}) - with open(f'sampling_experiment.pkl', 'wb') as f: - pickle.dump(results_of_experiments_dict, f) diff --git a/examples/automl_example/api_example/advanced_example/multimodal/__init__.py b/examples/automl_example/computer_vision/__init__.py similarity index 100% rename from examples/automl_example/api_example/advanced_example/multimodal/__init__.py rename to examples/automl_example/computer_vision/__init__.py diff --git a/examples/automl_example/api_example/advanced_example/specific_strategy/__init__.py b/examples/automl_example/computer_vision/image_classification/__init__.py similarity index 100% rename from examples/automl_example/api_example/advanced_example/specific_strategy/__init__.py rename to examples/automl_example/computer_vision/image_classification/__init__.py diff --git a/examples/automl_example/api_example/computer_vision/image_classification/image_clf_example.py b/examples/automl_example/computer_vision/image_classification/image_clf_example.py similarity index 99% rename from examples/automl_example/api_example/computer_vision/image_classification/image_clf_example.py rename to examples/automl_example/computer_vision/image_classification/image_clf_example.py index 954113e54..1b4942267 100644 --- a/examples/automl_example/api_example/computer_vision/image_classification/image_clf_example.py +++ b/examples/automl_example/computer_vision/image_classification/image_clf_example.py @@ -1,4 +1,5 @@ import random + import matplotlib.pyplot as plt from torchvision.transforms import ToTensor, Resize, Compose diff --git a/examples/automl_example/api_example/computer_vision/image_classification/mnist_lora_example.py b/examples/automl_example/computer_vision/image_classification/mnist_lora_example.py similarity index 99% rename from examples/automl_example/api_example/computer_vision/image_classification/mnist_lora_example.py rename to examples/automl_example/computer_vision/image_classification/mnist_lora_example.py index fb3d3e31d..4c58d401a 100644 --- a/examples/automl_example/api_example/computer_vision/image_classification/mnist_lora_example.py +++ b/examples/automl_example/computer_vision/image_classification/mnist_lora_example.py @@ -1,14 +1,11 @@ -from fedot_ind.core.models.nn.network_modules.layers.lora import linear_layer_parameterization - import torch import torch.nn as nn import torch.nn.utils.parametrize as parametrize - import torchvision.datasets as datasets import torchvision.transforms as transforms - from tqdm import tqdm +from fedot_ind.core.models.nn.network_modules.layers.lora import linear_layer_parameterization # Make torch deterministic _ = torch.manual_seed(228) diff --git a/examples/automl_example/api_example/time_series/ts_anomaly_detection/__init__.py b/examples/automl_example/computer_vision/object_detection/__init__.py similarity index 100% rename from examples/automl_example/api_example/time_series/ts_anomaly_detection/__init__.py rename to examples/automl_example/computer_vision/object_detection/__init__.py diff --git a/examples/automl_example/api_example/computer_vision/object_detection/obj_rec_example.py b/examples/automl_example/computer_vision/object_detection/obj_rec_example.py similarity index 99% rename from examples/automl_example/api_example/computer_vision/object_detection/obj_rec_example.py rename to examples/automl_example/computer_vision/object_detection/obj_rec_example.py index 0c555c4ec..1dfaf5169 100644 --- a/examples/automl_example/api_example/computer_vision/object_detection/obj_rec_example.py +++ b/examples/automl_example/computer_vision/object_detection/obj_rec_example.py @@ -1,9 +1,10 @@ import os import random + import yaml +from fedot_ind.core.architecture.datasets.visualization import draw_sample_with_bboxes from fedot_ind.api.main import FedotIndustrial -from fedot_ind.core.architecture.datasets.visualization import draw_sample_with_bboxes DATASETS_PATH = os.path.abspath('Warp-D') TEST_IMAGE_FOLDER = 'Land-Use_Scene_Classification/images_train_test_val/test' diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/__init__.py b/examples/automl_example/custom_strategy/__init__.py similarity index 100% rename from examples/automl_example/api_example/time_series/ts_forecasting/__init__.py rename to examples/automl_example/custom_strategy/__init__.py diff --git a/examples/real_world_examples/eeg/__init__.py b/examples/automl_example/custom_strategy/big_data/__init__.py similarity index 100% rename from examples/real_world_examples/eeg/__init__.py rename to examples/automl_example/custom_strategy/big_data/__init__.py diff --git a/examples/automl_example/custom_strategy/big_data/big_dataset_utils.py b/examples/automl_example/custom_strategy/big_data/big_dataset_utils.py new file mode 100644 index 000000000..f02baf79b --- /dev/null +++ b/examples/automl_example/custom_strategy/big_data/big_dataset_utils.py @@ -0,0 +1,13 @@ +import numpy as np + + +def create_big_dataset(): + train_X, test_X = np.load( + 'big_dataset/train_airlinescodrnaadult_fold0.npy'), np.load( + 'big_dataset/test_airlinescodrnaadult_fold0.npy') + train_y, test_y = np.load( + 'big_dataset/trainy_airlinescodrnaadult_fold0.npy'), np.load( + 'big_dataset/testy_airlinescodrnaadult_fold0.npy') + dataset_dict = dict(train_data=(train_X, train_y), + test_data=(test_X, test_y)) + return dataset_dict diff --git a/examples/benchmark_example/time_series_multi_forecast_benchmark.py b/examples/automl_example/custom_strategy/big_data/dask_backend.py similarity index 100% rename from examples/benchmark_example/time_series_multi_forecast_benchmark.py rename to examples/automl_example/custom_strategy/big_data/dask_backend.py diff --git a/examples/automl_example/api_example/advanced_example/specific_strategy/federated_automl_example.py b/examples/automl_example/custom_strategy/big_data/federated_automl_example.py similarity index 100% rename from examples/automl_example/api_example/advanced_example/specific_strategy/federated_automl_example.py rename to examples/automl_example/custom_strategy/big_data/federated_automl_example.py diff --git a/examples/automl_example/custom_strategy/big_data/random_sampling_example.py b/examples/automl_example/custom_strategy/big_data/random_sampling_example.py new file mode 100644 index 000000000..1bccde650 --- /dev/null +++ b/examples/automl_example/custom_strategy/big_data/random_sampling_example.py @@ -0,0 +1,36 @@ +from examples.automl_example.custom_strategy.big_data.big_dataset_utils import create_big_dataset +from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate + +cur_params = {'rank': None} +sampling_algorithm = {'CUR': cur_params} + +if __name__ == "__main__": + dataset_dict = create_big_dataset() + finetune = False + metric_names = ('f1', 'accuracy') + api_config = dict(problem='classification', + metric='f1', + timeout=40, + pop_size=10, + early_stopping_iterations=10, + early_stopping_timeout=30, + optimizer_params={'mutation_agent': 'bandit', + 'mutation_strategy': 'growth_mutation_strategy'}, + with_tunig=False, + preset='classification_tabular', + industrial_strategy_params={'data_type': 'tensor', + 'learning_strategy': 'big_dataset', + 'sampling_strategy': sampling_algorithm + }, + n_jobs=-1, + logging_level=20) + + result_dict = ApiTemplate(api_config=api_config, + metric_list=metric_names).eval(dataset=dataset_dict, + finetune=finetune) + metrics = result_dict['metrics'] + metrics.to_csv('./metrics.csv') + hist = result_dict['industrial_model'].save_optimization_history(return_history=True) + result_dict['industrial_model'].vis_optimisation_history(hist) + result_dict['industrial_model'].save_best_model() + _ = 1 diff --git a/examples/tutorial/time_series/ts_forecasting/__init__.py b/examples/automl_example/custom_strategy/explainability/__init__.py similarity index 100% rename from examples/tutorial/time_series/ts_forecasting/__init__.py rename to examples/automl_example/custom_strategy/explainability/__init__.py diff --git a/examples/automl_example/api_example/advanced_example/explainability/explainability.ipynb b/examples/automl_example/custom_strategy/explainability/explainability.ipynb similarity index 100% rename from examples/automl_example/api_example/advanced_example/explainability/explainability.ipynb rename to examples/automl_example/custom_strategy/explainability/explainability.ipynb diff --git a/examples/automl_example/api_example/advanced_example/explainability/optimisation_history_visualisation.py b/examples/automl_example/custom_strategy/explainability/optimisation_history_visualisation.py similarity index 100% rename from examples/automl_example/api_example/advanced_example/explainability/optimisation_history_visualisation.py rename to examples/automl_example/custom_strategy/explainability/optimisation_history_visualisation.py diff --git a/fedot_ind/core/models/algebra/__init__.py b/examples/automl_example/custom_strategy/multimodal/__init__.py similarity index 100% rename from fedot_ind/core/models/algebra/__init__.py rename to examples/automl_example/custom_strategy/multimodal/__init__.py diff --git a/examples/automl_example/api_example/advanced_example/multimodal/multimodal.py b/examples/automl_example/custom_strategy/multimodal/multimodal.py similarity index 100% rename from examples/automl_example/api_example/advanced_example/multimodal/multimodal.py rename to examples/automl_example/custom_strategy/multimodal/multimodal.py diff --git a/examples/automl_example/api_example/advanced_example/specific_strategy/LoRa_example.py b/examples/automl_example/custom_strategy/specific_strategy/LoRa_example.py similarity index 99% rename from examples/automl_example/api_example/advanced_example/specific_strategy/LoRa_example.py rename to examples/automl_example/custom_strategy/specific_strategy/LoRa_example.py index 7a7a4255a..febccf1e5 100644 --- a/examples/automl_example/api_example/advanced_example/specific_strategy/LoRa_example.py +++ b/examples/automl_example/custom_strategy/specific_strategy/LoRa_example.py @@ -1,7 +1,8 @@ -from fedot_ind.api.main import FedotIndustrial import torchvision.datasets as datasets import torchvision.transforms as transforms +from fedot_ind.api.main import FedotIndustrial + transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) diff --git a/fedot_ind/core/models/manifold/__init__.py b/examples/automl_example/custom_strategy/specific_strategy/__init__.py similarity index 100% rename from fedot_ind/core/models/manifold/__init__.py rename to examples/automl_example/custom_strategy/specific_strategy/__init__.py diff --git a/examples/automl_example/api_example/advanced_example/specific_strategy/kernel_ensemble_example.py b/examples/automl_example/custom_strategy/specific_strategy/kernel_ensemble_example.py similarity index 100% rename from examples/automl_example/api_example/advanced_example/specific_strategy/kernel_ensemble_example.py rename to examples/automl_example/custom_strategy/specific_strategy/kernel_ensemble_example.py diff --git a/examples/automl_example/api_example/advanced_example/specific_strategy/probability_calibration_example.py b/examples/automl_example/custom_strategy/specific_strategy/probability_calibration_example.py similarity index 100% rename from examples/automl_example/api_example/advanced_example/specific_strategy/probability_calibration_example.py rename to examples/automl_example/custom_strategy/specific_strategy/probability_calibration_example.py diff --git a/fedot_ind/core/models/quantile/__init__.py b/examples/automl_example/time_series/__init__.py similarity index 100% rename from fedot_ind/core/models/quantile/__init__.py rename to examples/automl_example/time_series/__init__.py diff --git a/fedot_ind/core/models/recurrence/__init__.py b/examples/automl_example/time_series/ts_anomaly_detection/__init__.py similarity index 100% rename from fedot_ind/core/models/recurrence/__init__.py rename to examples/automl_example/time_series/ts_anomaly_detection/__init__.py diff --git a/examples/automl_example/api_example/time_series/ts_anomaly_detection/custom_liman_example.py b/examples/automl_example/time_series/ts_anomaly_detection/custom_liman_example.py similarity index 97% rename from examples/automl_example/api_example/time_series/ts_anomaly_detection/custom_liman_example.py rename to examples/automl_example/time_series/ts_anomaly_detection/custom_liman_example.py index 21e83d5af..caef4407f 100644 --- a/examples/automl_example/api_example/time_series/ts_anomaly_detection/custom_liman_example.py +++ b/examples/automl_example/time_series/ts_anomaly_detection/custom_liman_example.py @@ -8,7 +8,7 @@ from fedot_ind.api.utils.checkers_collections import DataCheck from fedot_ind.api.utils.path_lib import PROJECT_PATH from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate -from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor +from fedot_ind.core.operation.transformation.representation.statistical.quantile_extractor import QuantileExtractor from fedot_ind.core.repository.constanst_repository import FEDOT_TASK from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels diff --git a/examples/automl_example/api_example/time_series/ts_anomaly_detection/ts_anomaly_detection_example.py b/examples/automl_example/time_series/ts_anomaly_detection/ts_anomaly_detection_example.py similarity index 100% rename from examples/automl_example/api_example/time_series/ts_anomaly_detection/ts_anomaly_detection_example.py rename to examples/automl_example/time_series/ts_anomaly_detection/ts_anomaly_detection_example.py diff --git a/fedot_ind/core/models/tabular/__init__.py b/examples/automl_example/time_series/ts_classification/__init__.py similarity index 100% rename from fedot_ind/core/models/tabular/__init__.py rename to examples/automl_example/time_series/ts_classification/__init__.py diff --git a/examples/automl_example/time_series/ts_classification/pdl_example.py b/examples/automl_example/time_series/ts_classification/pdl_example.py new file mode 100644 index 000000000..af91eef7a --- /dev/null +++ b/examples/automl_example/time_series/ts_classification/pdl_example.py @@ -0,0 +1,25 @@ +from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate + +if __name__ == "__main__": + dataset_name = 'Lightning7' + finetune = True + metric_names = ('f1', 'accuracy', 'precision', 'roc_auc') + api_config = dict(problem='classification', + metric='f1', + timeout=5, + pop_size=10, + with_tunig=False, + n_jobs=2, + logging_level=20) + init_assumption_pdl = ['quantile_extractor', 'pdl_clf'] + init_assumption_rf = ['quantile_extractor', 'rf'] + comparasion_dict = dict(pairwise_approach=init_assumption_pdl, + baseline=init_assumption_rf) + for approach in comparasion_dict.keys(): + result_dict = ApiTemplate(api_config=api_config, + metric_list=metric_names).eval(dataset=dataset_name, + initial_assumption=comparasion_dict[approach], + finetune=finetune) + metrics = result_dict['metrics'] + print(f'Approach - {approach}. Metrics - {metrics}') + _ = 1 diff --git a/examples/automl_example/time_series/ts_classification/tmp.py b/examples/automl_example/time_series/ts_classification/tmp.py new file mode 100644 index 000000000..e49dbd454 --- /dev/null +++ b/examples/automl_example/time_series/ts_classification/tmp.py @@ -0,0 +1,31 @@ +import numpy as np +from pdll import PairwiseDifferenceClassifier +from sklearn.datasets import make_blobs +from sklearn.ensemble import RandomForestClassifier + + +def multiclass_classification(): + # Set the random seed for reproducibility + np.random.seed(53) + + # Define the number of data points and features + n_samples = 10 + n_features = 2 + n_classes = 3 + + # Generate random data with 2 features, 10 points, and 3 classes + X, y = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_classes, random_state=0) + + base = RandomForestClassifier(class_weight="balanced", random_state=0) + pdc = PairwiseDifferenceClassifier(estimator=base) + pdc.fit(X, y) + print('score:', pdc.score(X, y)) + + pdc.predict(X) + pdc.predict_proba(X) + + assert pdc.score(X, y) == 1.0 + + +if __name__ == "__main__": + multiclass_classification() diff --git a/examples/automl_example/api_example/time_series/ts_classification/ts_classification_example.py b/examples/automl_example/time_series/ts_classification/ts_classification_example.py similarity index 100% rename from examples/automl_example/api_example/time_series/ts_classification/ts_classification_example.py rename to examples/automl_example/time_series/ts_classification/ts_classification_example.py diff --git a/fedot_ind/core/models/topological/__init__.py b/examples/automl_example/time_series/ts_forecasting/__init__.py similarity index 100% rename from fedot_ind/core/models/topological/__init__.py rename to examples/automl_example/time_series/ts_forecasting/__init__.py diff --git a/examples/automl_example/api_example/advanced_example/specific_strategy/forecasting_strategy_example.py b/examples/automl_example/time_series/ts_forecasting/forecasting_strategy_example.py similarity index 100% rename from examples/automl_example/api_example/advanced_example/specific_strategy/forecasting_strategy_example.py rename to examples/automl_example/time_series/ts_forecasting/forecasting_strategy_example.py diff --git a/examples/real_world_examples/kaggle/automl/log.log b/examples/automl_example/time_series/ts_forecasting/m4_analiysis/__init__.py similarity index 100% rename from examples/real_world_examples/kaggle/automl/log.log rename to examples/automl_example/time_series/ts_forecasting/m4_analiysis/__init__.py diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/m4_analiysis/forecasting_analysis.py b/examples/automl_example/time_series/ts_forecasting/m4_analiysis/forecasting_analysis.py similarity index 100% rename from examples/automl_example/api_example/time_series/ts_forecasting/m4_analiysis/forecasting_analysis.py rename to examples/automl_example/time_series/ts_forecasting/m4_analiysis/forecasting_analysis.py diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/m4_analiysis/ts_forecasting_m4_benchmark.py b/examples/automl_example/time_series/ts_forecasting/m4_analiysis/ts_forecasting_m4_benchmark.py similarity index 100% rename from examples/automl_example/api_example/time_series/ts_forecasting/m4_analiysis/ts_forecasting_m4_benchmark.py rename to examples/automl_example/time_series/ts_forecasting/m4_analiysis/ts_forecasting_m4_benchmark.py diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_deepar_examle.ipynb b/examples/automl_example/time_series/ts_forecasting/ts_forecasting_deepar_examle.ipynb similarity index 100% rename from examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_deepar_examle.ipynb rename to examples/automl_example/time_series/ts_forecasting/ts_forecasting_deepar_examle.ipynb diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py b/examples/automl_example/time_series/ts_forecasting/ts_forecasting_example.py similarity index 100% rename from examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_example.py rename to examples/automl_example/time_series/ts_forecasting/ts_forecasting_example.py diff --git a/examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_exogen.py b/examples/automl_example/time_series/ts_forecasting/ts_forecasting_exogen.py similarity index 100% rename from examples/automl_example/api_example/time_series/ts_forecasting/ts_forecasting_exogen.py rename to examples/automl_example/time_series/ts_forecasting/ts_forecasting_exogen.py diff --git a/examples/automl_example/time_series/ts_regression/__init__.py b/examples/automl_example/time_series/ts_regression/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/automl_example/time_series/ts_regression/pdl_example.py b/examples/automl_example/time_series/ts_regression/pdl_example.py new file mode 100644 index 000000000..61b1eda2a --- /dev/null +++ b/examples/automl_example/time_series/ts_regression/pdl_example.py @@ -0,0 +1,22 @@ +from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate + +if __name__ == "__main__": + dataset_name = 'AppliancesEnergy' # BeijingPM10Quality + finetune = False + api_config = dict(problem='regression', + metric='rmse', + timeout=0.1, + n_jobs=2, + logging_level=20) + metric_names = ('r2', 'rmse', 'mae') + init_assumption_pdl = ['quantile_extractor', 'pdl_reg'] + init_assumption_rf = ['quantile_extractor', 'treg'] + comparasion_dict = dict(pairwise_approach=init_assumption_pdl, + baseline=init_assumption_rf) + for approach in comparasion_dict.keys(): + result_dict = ApiTemplate(api_config=api_config, + metric_list=metric_names).eval(dataset=dataset_name, + initial_assumption=comparasion_dict[approach], + finetune=finetune) + metrics = result_dict['metrics'] + print(f'Approach - {approach}. Metrics - {metrics}') diff --git a/examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py b/examples/automl_example/time_series/ts_regression/ts_regression_example.py similarity index 100% rename from examples/automl_example/api_example/time_series/ts_regression/ts_regression_example.py rename to examples/automl_example/time_series/ts_regression/ts_regression_example.py diff --git a/examples/real_world_examples/__init__.py b/examples/real_world_examples/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/__init__.py b/examples/real_world_examples/benchmark_example/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/benchmark_example/analysis of results/analysis_multi_clf.ipynb b/examples/real_world_examples/benchmark_example/analysis of results/analysis_multi_clf.ipynb similarity index 100% rename from examples/benchmark_example/analysis of results/analysis_multi_clf.ipynb rename to examples/real_world_examples/benchmark_example/analysis of results/analysis_multi_clf.ipynb diff --git a/examples/benchmark_example/analysis of results/analysis_regr.ipynb b/examples/real_world_examples/benchmark_example/analysis of results/analysis_regr.ipynb similarity index 100% rename from examples/benchmark_example/analysis of results/analysis_regr.ipynb rename to examples/real_world_examples/benchmark_example/analysis of results/analysis_regr.ipynb diff --git a/examples/benchmark_example/analysis of results/analysis_uni_clf.ipynb b/examples/real_world_examples/benchmark_example/analysis of results/analysis_uni_clf.ipynb similarity index 100% rename from examples/benchmark_example/analysis of results/analysis_uni_clf.ipynb rename to examples/real_world_examples/benchmark_example/analysis of results/analysis_uni_clf.ipynb diff --git a/examples/real_world_examples/benchmark_example/classification/PDL_multi.py b/examples/real_world_examples/benchmark_example/classification/PDL_multi.py new file mode 100644 index 000000000..ca8b628fc --- /dev/null +++ b/examples/real_world_examples/benchmark_example/classification/PDL_multi.py @@ -0,0 +1,24 @@ +from benchmark.benchmark_TSC import BenchmarkTSC + +init_assumption_pdl = ['quantile_extractor', 'pdl_clf'] +init_assumption_rf = ['quantile_extractor', 'rf'] +comparasion_dict = dict(pairwise_approach=init_assumption_pdl, + baseline=init_assumption_rf) +experiment_setup = { + 'problem': 'classification', + 'metric': 'accuracy', + 'timeout': 2.0, + 'num_of_generations': 15, + 'pop_size': 10, + 'metric_names': ('f1', 'accuracy'), + 'logging_level': 10, + 'n_jobs': -1, + 'output_folder': r'D:\\WORK\\Repo\\Industiral\\IndustrialTS/benchmark/results/', + 'initial_assumption': comparasion_dict, + 'finetune': True} + +if __name__ == "__main__": + benchmark = BenchmarkTSC(experiment_setup=experiment_setup, + use_small_datasets=False) + benchmark.run() + _ = 1 diff --git a/examples/real_world_examples/benchmark_example/classification/PDL_uni.py b/examples/real_world_examples/benchmark_example/classification/PDL_uni.py new file mode 100644 index 000000000..d9c81ddf8 --- /dev/null +++ b/examples/real_world_examples/benchmark_example/classification/PDL_uni.py @@ -0,0 +1,24 @@ +from benchmark.benchmark_TSC import BenchmarkTSC + +init_assumption_pdl = ['quantile_extractor', 'pdl_clf'] +init_assumption_rf = ['quantile_extractor', 'rf'] +comparasion_dict = dict(pairwise_approach=init_assumption_pdl, + baseline=init_assumption_rf) +experiment_setup = { + 'problem': 'classification', + 'metric': 'accuracy', + 'timeout': 2.0, + 'num_of_generations': 15, + 'pop_size': 10, + 'metric_names': ('f1', 'accuracy'), + 'logging_level': 10, + 'n_jobs': -1, + 'initial_assumption': comparasion_dict, + 'output_folder': r'D:\\WORK\\Repo\\Industiral\\IndustrialTS/benchmark/results/', + 'finetune': True} + +if __name__ == "__main__": + benchmark = BenchmarkTSC(experiment_setup=experiment_setup, + use_small_datasets=True) + benchmark.run() + _ = 1 diff --git a/examples/benchmark_example/time_series_multi_clf_benchmark.py b/examples/real_world_examples/benchmark_example/classification/SOTA_multi.py similarity index 100% rename from examples/benchmark_example/time_series_multi_clf_benchmark.py rename to examples/real_world_examples/benchmark_example/classification/SOTA_multi.py diff --git a/examples/benchmark_example/time_series_uni_clf_benchmark.py b/examples/real_world_examples/benchmark_example/classification/SOTA_uni.py similarity index 100% rename from examples/benchmark_example/time_series_uni_clf_benchmark.py rename to examples/real_world_examples/benchmark_example/classification/SOTA_uni.py diff --git a/examples/real_world_examples/benchmark_example/classification/__init__.py b/examples/real_world_examples/benchmark_example/classification/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/forecasting/__init__.py b/examples/real_world_examples/benchmark_example/forecasting/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/forecasting/time_series_multi_forecast_benchmark.py b/examples/real_world_examples/benchmark_example/forecasting/time_series_multi_forecast_benchmark.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/benchmark_example/time_series_uni_forecast_benchmark.py b/examples/real_world_examples/benchmark_example/forecasting/time_series_uni_forecast_benchmark.py similarity index 100% rename from examples/benchmark_example/time_series_uni_forecast_benchmark.py rename to examples/real_world_examples/benchmark_example/forecasting/time_series_uni_forecast_benchmark.py diff --git a/examples/benchmark_example/time_siries_nbeats_forecast_benchmark.py b/examples/real_world_examples/benchmark_example/forecasting/time_siries_nbeats_forecast_benchmark.py similarity index 99% rename from examples/benchmark_example/time_siries_nbeats_forecast_benchmark.py rename to examples/real_world_examples/benchmark_example/forecasting/time_siries_nbeats_forecast_benchmark.py index 014398615..b3d7df168 100644 --- a/examples/benchmark_example/time_siries_nbeats_forecast_benchmark.py +++ b/examples/real_world_examples/benchmark_example/forecasting/time_siries_nbeats_forecast_benchmark.py @@ -1,13 +1,12 @@ import csv -from fedot_ind.core.models.nn.network_impl.nbeats import NBeatsNet - +import matplotlib.pyplot as plt +import numpy as np import torch from torch import optim from torch.nn import functional as F -import matplotlib.pyplot as plt -import numpy as np +from fedot_ind.core.models.nn.network_impl.nbeats import NBeatsNet def get_m4_data(backcast_length, forecast_length, is_training=True): diff --git a/examples/real_world_examples/kaggle/EEG.py b/examples/real_world_examples/benchmark_example/kaggle/EEG.py similarity index 99% rename from examples/real_world_examples/kaggle/EEG.py rename to examples/real_world_examples/benchmark_example/kaggle/EEG.py index 3d40ae1f3..49a33a903 100644 --- a/examples/real_world_examples/kaggle/EEG.py +++ b/examples/real_world_examples/benchmark_example/kaggle/EEG.py @@ -1,18 +1,17 @@ import gc import matplotlib +import numpy as np +import pandas as pd from fedot.core.pipelines.pipeline_builder import PipelineBuilder +from scipy.signal import butter, lfilter from sklearn.preprocessing import LabelEncoder - from tqdm import tqdm from benchmark.feature_utils import * from fedot_ind.api.main import FedotIndustrial from fedot_ind.api.utils.path_lib import PROJECT_PATH -from scipy.signal import butter, lfilter from fedot_ind.core.optimizer.IndustrialEvoOptimizer import IndustrialEvoOptimizer -import numpy as np -import pandas as pd matplotlib.use('TkAgg') diff --git a/examples/real_world_examples/kaggle/Harmful Brain Activity Classification.ipynb b/examples/real_world_examples/benchmark_example/kaggle/Harmful Brain Activity Classification.ipynb similarity index 100% rename from examples/real_world_examples/kaggle/Harmful Brain Activity Classification.ipynb rename to examples/real_world_examples/benchmark_example/kaggle/Harmful Brain Activity Classification.ipynb diff --git a/examples/real_world_examples/benchmark_example/kaggle/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/2024-03-04_13-36-19_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/2024-03-04_13-36-19_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/2024-03-04_13-36-19_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/2024-03-04_13-36-19_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/2024-03-04_13-36-19_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/2024-03-04_13-36-19_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/0_ensemble_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/0_ensemble_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/0_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/0_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/0_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/0_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/0_ensemble_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/0_ensemble_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/1_ensemble_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/1_ensemble_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/1_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/1_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/1_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/1_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/1_ensemble_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/1_ensemble_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/2_ensemble_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/2_ensemble_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/2_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/2_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/2_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/2_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/2_ensemble_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/2_ensemble_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/3_ensemble_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/3_ensemble_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/3_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/3_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/3_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/3_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/3_ensemble_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/3_ensemble_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/ensemble_head/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/ensemble_head/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/ensemble_head/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/ensemble_head/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/ensemble_head/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/ensemble_head/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/ensemble_head/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/automl/raf_ensemble/ensemble_head/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_branch/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_branch/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/1_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/1_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/1_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/1_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/1_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/1_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/2_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/2_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/2_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/2_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/2_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/2_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/3_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/3_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/3_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/3_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/3_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/3_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/composed/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/composed/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/0_ensemble_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/0_ensemble_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/0_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/0_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/0_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/0_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/0_ensemble_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/0_ensemble_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/1_ensemble_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/1_ensemble_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/1_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/1_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/1_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/1_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/1_ensemble_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/1_ensemble_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/2_ensemble_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/2_ensemble_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/2_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/2_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/2_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/2_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/2_ensemble_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/2_ensemble_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/3_ensemble_branch/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/3_ensemble_branch/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/3_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/3_ensemble_branch/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/3_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/3_ensemble_branch/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/3_ensemble_branch/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/3_ensemble_branch/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_composed/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_composed/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_composed/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_composed/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_composed/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_composed/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_composed/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_composed/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_head/0_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_head/0_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_head/0_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_head/0_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_head/0_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_head/0_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_head/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/archive/raf_ensemble/ensemble_head/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_03-41-51_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_03-41-51_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_03-41-51_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_03-41-51_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_03-41-51_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_03-41-51_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_14-21-09_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_14-21-09_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_14-21-09_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_14-21-09_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_14-21-09_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-08_14-21-09_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-12_19-26-25_pipeline_saved/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-12_19-26-25_pipeline_saved/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-12_19-26-25_pipeline_saved/fitted_operations/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-12_19-26-25_pipeline_saved/fitted_operations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-12_19-26-25_pipeline_saved/preprocessing/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/automl/2024-03-12_19-26-25_pipeline_saved/preprocessing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/automl/__init__.py b/examples/real_world_examples/benchmark_example/kaggle/automl/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/benchmark_example/kaggle/automl/log.log b/examples/real_world_examples/benchmark_example/kaggle/automl/log.log new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/kaggle/kaggle_forecasting.py b/examples/real_world_examples/benchmark_example/kaggle/kaggle_forecasting.py similarity index 98% rename from examples/real_world_examples/kaggle/kaggle_forecasting.py rename to examples/real_world_examples/benchmark_example/kaggle/kaggle_forecasting.py index 4444faaca..9c372c4a1 100644 --- a/examples/real_world_examples/kaggle/kaggle_forecasting.py +++ b/examples/real_world_examples/benchmark_example/kaggle/kaggle_forecasting.py @@ -68,7 +68,7 @@ def forecasting_loop(dataset_dict): finetune=finetune) ts_dict.update({time_series_id: result_dict}) - test = pd.read_csv("./data/test.csv", parse_dates=["date"]) + test = pd.read_csv("data/test.csv", parse_dates=["date"]) test['orders'] = 0 for warehouse, time_series_id in warehouse_to_item_id.items(): diff --git a/examples/real_world_examples/benchmark_example/regression/PDL_multi.py b/examples/real_world_examples/benchmark_example/regression/PDL_multi.py new file mode 100644 index 000000000..82d3218e8 --- /dev/null +++ b/examples/real_world_examples/benchmark_example/regression/PDL_multi.py @@ -0,0 +1,45 @@ +from benchmark.benchmark_TSER import BenchmarkTSER + +init_assumption_pdl = ['quantile_extractor', 'pdl_reg'] +init_assumption_rf = ['quantile_extractor', 'treg'] +comparasion_dict = dict(pairwise_approach=init_assumption_pdl, + baseline=init_assumption_rf) +experiment_setup = { + 'problem': 'regression', + 'metric': 'rmse', + 'timeout': 2.0, + 'num_of_generations': 15, + 'pop_size': 10, + 'metric_names': ('f1', 'accuracy'), + 'logging_level': 10, + 'n_jobs': -1, + 'initial_assumption': comparasion_dict, + 'finetune': True} +custom_dataset = [ + # 'ElectricMotorTemperature', + # 'PrecipitationAndalusia', + # 'AcousticContaminationMadrid', + # 'WindTurbinePower', + # 'DailyOilGasPrices', + # 'DailyTemperatureLatitude', + # 'LPGasMonitoringHomeActivity', + # 'AluminiumConcentration', + # 'BoronConcentration', + # 'CopperConcentration', + # # 'IronConcentration', + # 'ManganeseConcentration', + # 'SodiumConcentration', + # 'PhosphorusConcentration', + # 'PotassiumConcentration', + 'MagnesiumConcentration', + 'SulphurConcentration', + 'ZincConcentration', + 'CalciumConcentration' +] +custom_dataset = None +if __name__ == "__main__": + benchmark = BenchmarkTSER(experiment_setup=experiment_setup, + custom_datasets=custom_dataset + ) + benchmark.run() + _ = 1 diff --git a/examples/benchmark_example/time_series_multi_reg_benchmark.py b/examples/real_world_examples/benchmark_example/regression/SOTA_multi.py similarity index 100% rename from examples/benchmark_example/time_series_multi_reg_benchmark.py rename to examples/real_world_examples/benchmark_example/regression/SOTA_multi.py diff --git a/examples/real_world_examples/benchmark_example/regression/__init__.py b/examples/real_world_examples/benchmark_example/regression/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/industrial_examples/__init__.py b/examples/real_world_examples/industrial_examples/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/industrial_examples/eeg/__init__.py b/examples/real_world_examples/industrial_examples/eeg/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/real_world_examples/eeg/neiry_example.py b/examples/real_world_examples/industrial_examples/eeg/neiry_example.py similarity index 96% rename from examples/real_world_examples/eeg/neiry_example.py rename to examples/real_world_examples/industrial_examples/eeg/neiry_example.py index fc1651948..5ca17bd3f 100644 --- a/examples/real_world_examples/eeg/neiry_example.py +++ b/examples/real_world_examples/industrial_examples/eeg/neiry_example.py @@ -21,8 +21,8 @@ def generate_composite_features(input_data): if __name__ == "__main__": - sig_X = np.load('./sig_data.npy').swapaxes(1, 2) - sig_y = np.load('./sig_target.npy') + sig_X = np.load('sig_data.npy').swapaxes(1, 2) + sig_y = np.load('sig_target.npy') metric_names = ('f1', 'accuracy') scaler = StandardScaler() pca = PCA(.975) diff --git a/examples/real_world_examples/eeg/neiry_industrial_demo.ipynb b/examples/real_world_examples/industrial_examples/eeg/neiry_industrial_demo.ipynb similarity index 100% rename from examples/real_world_examples/eeg/neiry_industrial_demo.ipynb rename to examples/real_world_examples/industrial_examples/eeg/neiry_industrial_demo.ipynb diff --git a/examples/real_world_examples/industrial_examples/equipment_monitoring/parma_example.py b/examples/real_world_examples/industrial_examples/equipment_monitoring/parma_example.py new file mode 100644 index 000000000..11625315f --- /dev/null +++ b/examples/real_world_examples/industrial_examples/equipment_monitoring/parma_example.py @@ -0,0 +1,76 @@ +import gc + +import matplotlib +import numpy as np +from sklearn.utils import shuffle + +from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate +from fedot_ind.core.operation.transformation.data.park_transformation import park_transform + +matplotlib.use('TkAgg') +gc.collect() +metric_names = ('f1', 'accuracy', 'precision', 'roc_auc') +stat_params = {'window_size': 0, 'stride': 1, 'add_global_features': True, 'use_sliding_window': False} +fourier_params = {'low_rank': 5, 'output_format': 'signal', 'compute_heuristic_representation': True, + 'approximation': 'smooth', 'threshold': 0.9, 'sampling_rate': 64e3} +wavelet_params = {'n_components': 3, 'wavelet': 'bior3.7', 'compute_heuristic_representation': True} +park_params = {} +rocket_params = {"num_features": 200} +sampling_dict = dict(samples=dict(start_idx=0, + end_idx=None), + channels=dict(start_idx=0, + end_idx=None), + elements=dict(start_idx=0, + end_idx=None)) + +feature_generator = { + # 'minirocket': [('minirocket_extractor', rocket_params)], + # 'stat_generator': [('quantile_extractor', stat_params)], + 'fourier': [('fourier_basis', fourier_params)], + 'wavelet': [('wavelet_basis', wavelet_params)], +} + + +def load_data(use_park_transform: bool = False): + train_features, train_target = np.load('./dataset/X_train.npy').swapaxes(1, 2), np.load('./dataset/y_train.npy') + test_features, test_target = np.load('./dataset/X_test.npy').swapaxes(1, 2), np.load('./dataset/y_test.npy') + train_features, train_target = shuffle(train_features, train_target) + if use_park_transform: + train_features, test_features = park_transform(train_features), park_transform(test_features) + input_train = (train_features, train_target) + input_test = (test_features, test_target) + + dataset = dict(test_data=input_test, train_data=input_train) + return dataset + + +if __name__ == "__main__": + finetune = False + dataset = load_data(use_park_transform=True) + api_config = dict(problem='classification', + metric='f1', + timeout=40, + pop_size=10, + early_stopping_iterations=10, + early_stopping_timeout=30, + optimizer_params={'mutation_agent': 'random', + 'mutation_strategy': 'params_mutation_strategy'}, + with_tunig=False, + preset='classification_tabular', + industrial_strategy_params={'feature_generator': feature_generator, + 'data_type': 'tensor', + 'learning_strategy': 'ts2tabular', + 'sampling_strategy': sampling_dict + }, + n_jobs=-1, + logging_level=20) + + result_dict = ApiTemplate(api_config=api_config, + metric_list=metric_names).eval(dataset=dataset, + finetune=finetune) + metrics = result_dict['metrics'] + metrics.to_csv('./metrics.csv') + hist = result_dict['industrial_model'].save_optimization_history(return_history=True) + result_dict['industrial_model'].vis_optimisation_history(hist) + result_dict['industrial_model'].save_best_model() + _ = 1 diff --git a/examples/benchmark_example/LoRA notebooks/1_LoRA_Implementation.ipynb b/examples/tutorial/LoRA notebooks/1_LoRA_Implementation.ipynb similarity index 100% rename from examples/benchmark_example/LoRA notebooks/1_LoRA_Implementation.ipynb rename to examples/tutorial/LoRA notebooks/1_LoRA_Implementation.ipynb diff --git a/examples/benchmark_example/LoRA notebooks/2_LoRA_NBEATS_Metrics.ipynb b/examples/tutorial/LoRA notebooks/2_LoRA_NBEATS_Metrics.ipynb similarity index 100% rename from examples/benchmark_example/LoRA notebooks/2_LoRA_NBEATS_Metrics.ipynb rename to examples/tutorial/LoRA notebooks/2_LoRA_NBEATS_Metrics.ipynb diff --git a/examples/benchmark_example/LoRA notebooks/3_LoRA_Transformer_Metrics.ipynb b/examples/tutorial/LoRA notebooks/3_LoRA_Transformer_Metrics.ipynb similarity index 100% rename from examples/benchmark_example/LoRA notebooks/3_LoRA_Transformer_Metrics.ipynb rename to examples/tutorial/LoRA notebooks/3_LoRA_Transformer_Metrics.ipynb diff --git a/examples/benchmark_example/LoRA notebooks/4_LoRA_rSVD.ipynb b/examples/tutorial/LoRA notebooks/4_LoRA_rSVD.ipynb similarity index 100% rename from examples/benchmark_example/LoRA notebooks/4_LoRA_rSVD.ipynb rename to examples/tutorial/LoRA notebooks/4_LoRA_rSVD.ipynb diff --git a/examples/benchmark_example/LoRA notebooks/lora_nbeats.py b/examples/tutorial/LoRA notebooks/lora_nbeats.py similarity index 100% rename from examples/benchmark_example/LoRA notebooks/lora_nbeats.py rename to examples/tutorial/LoRA notebooks/lora_nbeats.py index 55014c823..71ae63cc8 100644 --- a/examples/benchmark_example/LoRA notebooks/lora_nbeats.py +++ b/examples/tutorial/LoRA notebooks/lora_nbeats.py @@ -1,8 +1,8 @@ import torch +import torch.nn as nn import torchvision.datasets as datasets import torchvision.transforms as v2 -import torch.nn as nn def load_mnist_data(): diff --git a/examples/tutorial/time_series/ts_classification/classification_example.ipynb b/examples/tutorial/time_series/ts_classification/classification_example.ipynb deleted file mode 100644 index 1b2c522a0..000000000 --- a/examples/tutorial/time_series/ts_classification/classification_example.ipynb +++ /dev/null @@ -1,782 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 120, - "metadata": { - "collapsed": true, - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import pandas as pd\n", - "\n", - "from fedot_ind.core.architecture.pipelines.abstract_pipeline import AbstractPipeline, ApiTemplate" - ] - }, - { - "cell_type": "code", - "execution_count": 148, - "outputs": [], - "source": [ - "def plot_mean_sample(X,y, labels:list = [], n_channel: int = 1):\n", - " mean_sample = []\n", - " if len(labels) == 0:\n", - " labels = list(np.unique(y))\n", - " for label in labels:\n", - " mean_sample.append(np.mean(X[y == label] , axis=0)) # Данные класса 1\n", - " ax = plt.gca()\n", - " channels = [f'Channel {x}' for x in range(n_channel)]\n", - " df = pd.DataFrame(mean_sample).T\n", - " df.columns = labels\n", - " df.plot(kind ='line',ax=ax)\n", - " plt.legend(fontsize='small')\n", - " plt.legend(loc='upper left', bbox_to_anchor=(1, 1))\n", - " plt.show()" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 149, - "outputs": [], - "source": [ - "def plot_mean_sample_multi(X,y, labels:list = [], n_channel: int = None):\n", - " mean_sample = {}\n", - " if len(labels) == 0:\n", - " labels = list(np.unique(y))\n", - " if n_channel is None:\n", - " n_channel = X.shape[1]\n", - " channels = [f'Channel {x}' for x in range(n_channel)]\n", - " for label in labels:\n", - " mask = y == label\n", - " for chn in range(n_channel):\n", - " mean_sample.update({f'Label_{label}_channel_{chn}':np.mean(X[mask.flatten(),chn,:] , axis=0)}) # Данные класса 1\n", - " ax = plt.gca()\n", - " df = pd.DataFrame(mean_sample)\n", - " df.plot(kind ='line', ax=ax)\n", - " plt.suptitle('Усреднённые семплы по классам')\n", - " plt.legend(fontsize='small')\n", - " plt.legend(loc='upper left', bbox_to_anchor=(1, 1))\n", - " plt.show()" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 140, - "outputs": [], - "source": [ - "finetune = False\n", - "metric_names = ('f1', 'accuracy', 'precision', 'roc_auc')\n", - "api_config = dict(problem='classification',\n", - " metric='accuracy',\n", - " timeout=1,\n", - " pop_size=10,\n", - " with_tunig=False,\n", - " n_jobs=2,\n", - " logging_level=20)\n", - "pipeline_creator = AbstractPipeline(task='classification')" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "# Our datasets and models for experiments" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 152, - "outputs": [], - "source": [ - "easy_to_clf_uno = 'ItalyPowerDemand'\n", - "hard_to_clf_uno = 'ElectricDevices'\n", - "easy_to_clf_multi = 'BasicMotions'\n", - "hard_to_clf_multi = 'AtrialFibrillation'\n", - "node_list_model = ['quantile_extractor','logit']" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "# Our datasets" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 153, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-10-14 17:24:08,554 - Reading data from D:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\data\\ItalyPowerDemand\n", - "2024-10-14 17:24:08,571 - Data read successfully from local folder\n", - "2024-10-14 17:24:08,573 - Reading data from D:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\data\\ElectricDevices\n", - "2024-10-14 17:24:09,029 - Data read successfully from local folder\n", - "2024-10-14 17:24:09,061 - Reading data from D:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\data\\BasicMotions\n", - "2024-10-14 17:24:09,143 - Data read successfully from local folder\n", - "2024-10-14 17:24:09,150 - Reading data from D:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\data\\AtrialFibrillation\n", - "2024-10-14 17:24:09,175 - Data read successfully from local folder\n" - ] - } - ], - "source": [ - "easy_to_clf_uno_dataset = pipeline_creator.create_input_data(easy_to_clf_uno)\n", - "hard_to_clf_uno_dataset = pipeline_creator.create_input_data(hard_to_clf_uno)\n", - "easy_to_clf_multi_dataset = pipeline_creator.create_input_data(easy_to_clf_multi)\n", - "hard_to_clf_multi_dataset = pipeline_creator.create_input_data(hard_to_clf_multi)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "# Lets Visualise our data" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "## Easy to clf data" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 154, - "outputs": [ - { - "data": { - "text/plain": "
", - "image/png": "" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot_mean_sample(easy_to_clf_uno_dataset[0].features,easy_to_clf_uno_dataset[0].target)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "## Hard to clf data" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 155, - "outputs": [ - { - "data": { - "text/plain": "
", - "image/png": "" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot_mean_sample(hard_to_clf_uno_dataset[0].features,hard_to_clf_uno_dataset[0].target)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 145, - "outputs": [ - { - "data": { - "text/plain": "
", - "image/png": "" - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plot_mean_sample_multi(easy_to_clf_multi_dataset[0].features,easy_to_clf_multi_dataset[0].target)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "# Transform initial row in feature vector. Easy dataset" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 131, - "outputs": [], - "source": [ - "stat_pipeline = pipeline_creator.create_pipeline(node_list_model)\n", - "feature_extractor = pipeline_creator.create_pipeline(['quantile_extractor'])\n", - "feature_matrix = feature_extractor.fit(easy_to_clf_uno_dataset[0])\n", - "initial_ts, transformed_ts = pd.DataFrame(feature_matrix.features.squeeze()),pd.DataFrame(feature_matrix.predict.squeeze())\n", - "transformed_ts['target'] = feature_matrix.target" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "node_dict = {'quantile_extractor':{'window_size':10,\n", - " 'stride':50}}" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 132, - "outputs": [ - { - "data": { - "text/plain": " 0 1 2 3 4 5 6 \\\ntarget \n1.0 0.199062 -1.086262 3.0 0.074049 0.882922 1.441785 0.958333 \n2.0 -0.306763 -1.338495 4.0 0.077907 0.776345 1.698066 0.958333 \n\n 7 8 9 ... 18 19 20 \\\ntarget ... \n1.0 0.166667 0.810975 4.220176 ... 0.743115 -1.850372e-17 -0.203329 \n2.0 0.166667 0.915196 4.251629 ... 0.704191 -4.166667e-10 0.154370 \n\n 21 22 23 24 25 26 27 \ntarget \n1.0 0.978945 1.732495 -1.493878 -1.435400 -0.606625 0.835160 1.409857 \n2.0 0.978945 1.330520 -1.521644 -1.508412 -0.882112 0.815954 1.292295 \n\n[2 rows x 28 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0123456789...18192021222324252627
target
1.00.199062-1.0862623.00.0740490.8829221.4417850.9583330.1666670.8109754.220176...0.743115-1.850372e-17-0.2033290.9789451.732495-1.493878-1.435400-0.6066250.8351601.409857
2.0-0.306763-1.3384954.00.0779070.7763451.6980660.9583330.1666670.9151964.251629...0.704191-4.166667e-100.1543700.9789451.330520-1.521644-1.508412-0.8821120.8159541.292295
\n

2 rows × 28 columns

\n
" - }, - "execution_count": 132, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "transformed_ts.groupby(by='target').first()" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 146, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-10-14 17:12:42,894 - Reading data from D:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\data\\ItalyPowerDemand\n", - "2024-10-14 17:12:42,904 - Data read successfully from local folder\n", - "2024-10-14 17:12:42,909 - Initialising experiment setup\n", - "2024-10-14 17:12:42,910 - -------------------------------------------------\n", - "2024-10-14 17:12:42,911 - Initialising Industrial Repository\n", - "2024-10-14 17:12:42,912 - -------------------------------------------------\n", - "2024-10-14 17:12:42,912 - Initialising Dask Server\n", - "Creating Dask Server\n", - "2024-10-14 17:12:42,922 - State start\n", - "2024-10-14 17:12:42,934 - Scheduler at: inproc://10.64.4.172/17848/75\n", - "2024-10-14 17:12:42,935 - dashboard at: http://10.64.4.172:60191/status\n", - "2024-10-14 17:12:42,936 - Registering Worker plugin shuffle\n", - "2024-10-14 17:12:42,953 - Start worker at: inproc://10.64.4.172/17848/78\n", - "2024-10-14 17:12:42,957 - Listening to: inproc10.64.4.172\n", - "2024-10-14 17:12:42,958 - Worker name: 0\n", - "2024-10-14 17:12:42,959 - dashboard at: 10.64.4.172:60192\n", - "2024-10-14 17:12:42,959 - Waiting to connect to: inproc://10.64.4.172/17848/75\n", - "2024-10-14 17:12:42,960 - -------------------------------------------------\n", - "2024-10-14 17:12:42,961 - Threads: 8\n", - "2024-10-14 17:12:42,962 - Memory: 31.95 GiB\n", - "2024-10-14 17:12:42,963 - Local Directory: C:\\Users\\user\\AppData\\Local\\Temp\\dask-scratch-space\\worker-in7kt88h\n", - "2024-10-14 17:12:42,964 - -------------------------------------------------\n", - "2024-10-14 17:12:42,968 - Register worker \n", - "2024-10-14 17:12:42,973 - Starting worker compute stream, inproc://10.64.4.172/17848/78\n", - "2024-10-14 17:12:42,974 - Starting established connection to inproc://10.64.4.172/17848/79\n", - "2024-10-14 17:12:42,976 - Starting Worker plugin shuffle\n", - "2024-10-14 17:12:42,977 - Registered to: inproc://10.64.4.172/17848/75\n", - "2024-10-14 17:12:42,978 - -------------------------------------------------\n", - "2024-10-14 17:12:42,980 - Starting established connection to inproc://10.64.4.172/17848/75\n", - "2024-10-14 17:12:42,985 - Receive client connection: Client-60de385f-8a36-11ef-85b8-b42e99a00ea1\n", - "2024-10-14 17:12:42,990 - Starting established connection to inproc://10.64.4.172/17848/80\n", - "2024-10-14 17:12:42,992 - LinK Dask Server - http://10.64.4.172:60191/status\n", - "2024-10-14 17:12:42,994 - -------------------------------------------------\n", - "2024-10-14 17:12:42,994 - Initialising solver\n", - "AssumptionsHandler - Initial pipeline fitting started\n", - "AssumptionsHandler - Initial pipeline was fitted successfully\n", - "AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 0.3 MiB, max: 0.4 MiB\n", - "ApiComposer - Initial pipeline was fitted in 0.5 sec.\n", - "AssumptionsHandler - Preset was changed to best_quality due to fit time estimation for initial model.\n", - "ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['xgboost', 'catboost', 'logit', 'dt', 'rf', 'mlp', 'lgbm', 'one_class_svm', 'inception_model', 'nbeats_model', 'tcn_model', 'deepar_model', 'channel_filtration', 'eigen_basis', 'wavelet_basis', 'fourier_basis', 'quantile_extractor', 'topological_extractor', 'minirocket_extractor', 'scaling', 'normalization', 'simple_imputation', 'kernel_pca', 'topological_extractor'].\n", - "ApiComposer - Pipeline composition started.\n", - "DataSourceSplitter - K-folds cross validation is applied.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Generations: 0%| | 0/10000 [00:00 on (/n_quantile_extractor_{'window_size': 45, 'stride': 1, 'add_global_features': False};)/n_logit\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'window_size': 35, 'stride': 9, 'add_global_features': True};)/n_logit_{'C': 0.08018172582949566, 'penalty': 'l1', 'solver': 'liblinear'}\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception on (/n_scaling;)/n_logit\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'window_size': 25, 'stride': 3, 'add_global_features': False};)/n_logit_{'C': 0.4647070387880487, 'penalty': 'l2', 'solver': 'liblinear'}\n", - "IndustrialDispatcher - 2 individuals out of 13 in previous population were evaluated successfully. 0.15384615384615385% is a fairly small percentage of successful evaluation.\n", - "IndustrialEvoOptimizer - Generation num: 1 size: 2\n", - "IndustrialEvoOptimizer - Best individuals: HallOfFame archive fitness (1): ['']\n", - "GroupedCondition - Optimisation stopped: Time limit is reached\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Generations: 0%| | 0/10000 [00:43']\n", - "IndustrialEvoOptimizer - no improvements for 1 iterations\n", - "IndustrialEvoOptimizer - spent time: 0.7 min\n", - "GPComposer - GP composition finished\n", - "DataSourceSplitter - K-folds cross validation is applied.\n", - "ApiComposer - Hyperparameters tuning started with 0 min. timeout\n", - "SimultaneousTuner - Hyperparameters optimization start: estimation of metric for initial graph\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SimultaneousTuner - Initial graph: {'depth': 2, 'length': 2, 'nodes': [logit, quantile_extractor]}\n", - "logit - {'C': 0.9004533434023936, 'penalty': 'l2', 'solver': 'liblinear'}\n", - "quantile_extractor - {} \n", - "Initial metric: [0.792]\n", - " 0%| | 0/10 [00:00 on (/n_quantile_extractor_{'add_global_features': False, 'stride': 8, 'window_size': 5};)/n_logit_{'C': 0.9004533434023936, 'penalty': 'l2', 'solver': 'liblinear'}\n", - " 10%|█ | 1/10 [00:00<00:03, 2.43trial/s, best loss: inf]2024-10-14 17:13:28,918 - build_posterior_wrapper took 0.009996 seconds\n", - "2024-10-14 17:13:28,924 - TPE using 1/1 trials with best loss inf\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'add_global_features': True, 'stride': 5, 'window_size': 35};)/n_logit_{'C': 0.9004533434023936, 'penalty': 'l2', 'solver': 'liblinear'}\n", - " 20%|██ | 2/10 [00:08<00:40, 5.06s/trial, best loss: inf]2024-10-14 17:13:37,233 - build_posterior_wrapper took 0.012998 seconds\n", - "2024-10-14 17:13:37,241 - TPE using 2/2 trials with best loss inf\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'add_global_features': False, 'stride': 8, 'window_size': 35};)/n_logit_{'C': 0.9004533434023936, 'penalty': 'l2', 'solver': 'liblinear'}\n", - " 30%|███ | 3/10 [00:17<00:39, 5.67s/trial, best loss: inf]\n", - "SimultaneousTuner - Tunner stopped after initial search due to the lack of time\n", - "SimultaneousTuner - Hyperparameters optimization finished\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'add_global_features': False, 'stride': 8, 'window_size': 5};)/n_logit_{'C': 0.9004533434023936, 'penalty': 'l2', 'solver': 'liblinear'}\n", - "SimultaneousTuner - Return init graph due to the fact that obtained metric is None. Initial metric is 0.793\n", - "SimultaneousTuner - Final graph: {'depth': 2, 'length': 2, 'nodes': [logit, quantile_extractor]}\n", - "logit - {'C': 0.9004533434023936, 'penalty': 'l2', 'solver': 'liblinear'}\n", - "quantile_extractor - {}\n", - "SimultaneousTuner - Final metric: 0.792\n", - "ApiComposer - Hyperparameters tuning finished\n", - "ApiComposer - Model generation finished\n", - "FEDOT logger - Final pipeline was fitted\n", - "FEDOT logger - Final pipeline: {'depth': 2, 'length': 2, 'nodes': [logit, quantile_extractor]}\n", - "logit - {'C': 0.9004533434023936, 'penalty': 'l2', 'solver': 'liblinear'}\n", - "quantile_extractor - {}\n", - "MemoryAnalytics - Memory consumption for finish in main session: current 18.7 MiB, max: 19.3 MiB\n", - "FEDOT logger - Predictions was saved in current directory.\n", - "FEDOT logger - Predictions was saved in current directory.\n" - ] - } - ], - "source": [ - "result_dict = ApiTemplate(api_config=api_config,\n", - " metric_list=metric_names).eval(dataset='ItalyPowerDemand',\n", - " finetune=finetune,\n", - " initial_assumption = node_list_model)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 147, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " accuracy f1 precision\n", - "0 0.723 0.742 0.728\n" - ] - } - ], - "source": [ - "print(result_dict['metrics'])" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "markdown", - "source": [ - "# Transform initial row in feature vector. Hard dataset" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%% md\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 156, - "outputs": [], - "source": [ - "stat_pipeline = pipeline_creator.create_pipeline(node_list_model)\n", - "feature_extractor = pipeline_creator.create_pipeline(['quantile_extractor'])\n", - "feature_matrix = feature_extractor.fit(hard_to_clf_uno_dataset[0])\n", - "initial_ts, transformed_ts = pd.DataFrame(feature_matrix.features.squeeze()),pd.DataFrame(feature_matrix.predict.squeeze())\n", - "transformed_ts['target'] = feature_matrix.target" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 160, - "outputs": [ - { - "data": { - "text/plain": " 0 1 2 3 4 5 6 \\\n0 4.781661 21.857035 2.0 0.004143 0.303948 0.000000 0.989583 \n1 6.982644 51.614110 1.0 0.004286 0.862304 0.000000 0.989583 \n2 0.631072 -1.492669 10.0 -0.000874 0.217731 2.014866 0.989583 \n3 2.141406 3.358323 6.0 0.018356 0.034031 0.000000 0.989583 \n4 8.295791 72.098259 1.0 -0.002390 0.864106 0.000000 0.989583 \n... ... ... ... ... ... ... ... \n8921 5.898298 34.676828 2.0 0.007734 0.866926 0.000000 0.989583 \n8922 0.135169 -0.445727 33.0 0.009239 0.517216 1.597863 0.989583 \n8923 -1.173111 -0.637535 23.0 -0.000499 0.140819 0.574335 0.989583 \n8924 6.817212 49.579918 2.0 0.005960 0.859749 0.000000 0.989583 \n8925 5.778912 34.677022 19.0 0.011177 0.631451 0.157468 0.989583 \n\n 7 8 9 ... 19 20 21 \\\n0 0.041667 0.503727 0.498850 ... -1.770833e-09 -0.213938 0.994778 \n1 0.020833 0.619377 0.333216 ... 1.145833e-09 -0.166598 0.994778 \n2 0.218750 0.544344 2.323477 ... 3.333333e-09 -0.783021 0.994778 \n3 0.125000 0.768398 1.151543 ... -5.000000e-09 -0.435438 0.994778 \n4 0.020833 0.333090 0.166928 ... -3.125000e-09 -0.133151 0.994778 \n... ... ... ... ... ... ... ... \n8921 0.020833 0.670338 0.416115 ... -1.135417e-09 -0.180066 0.994778 \n8922 0.458333 -0.037192 4.296933 ... -8.322917e-09 0.094318 0.994778 \n8923 0.489583 -0.333333 0.811278 ... -5.000000e-09 0.574335 0.994778 \n8924 0.020833 0.561447 0.498850 ... 7.291667e-10 -0.185983 0.994778 \n8925 0.020833 0.551156 3.885481 ... 9.166668e-11 -0.245496 0.994778 \n\n 22 23 24 25 26 27 target \n0 5.585047 -0.213938 -0.213938 -2.139382e-01 -0.213938 0.154251 1.0 \n1 8.131040 -0.166598 -0.166598 -1.665976e-01 -0.166598 -0.166598 4.0 \n2 1.696814 -0.783021 -0.783021 -7.830209e-01 1.231845 1.580572 2.0 \n3 3.662803 -0.435438 -0.435438 -4.354381e-01 -0.435438 2.125963 5.0 \n4 8.997227 -0.133151 -0.133151 -1.331514e-01 -0.133151 -0.133151 3.0 \n... ... ... ... ... ... ... ... \n8921 6.532028 -0.180066 -0.180066 -1.800663e-01 -0.180066 -0.117773 4.0 \n8922 2.757423 -1.858625 -1.503544 -7.933832e-01 0.804479 1.559026 4.0 \n8923 0.574335 -1.723006 -1.723006 -5.000000e-09 0.574335 0.574335 5.0 \n8924 8.019215 -0.185983 -0.185983 -1.859831e-01 -0.185983 0.676157 4.0 \n8925 6.436382 -0.303235 -0.274366 -2.717411e-01 -0.114273 0.452610 4.0 \n\n[8926 rows x 29 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0123456789...192021222324252627target
04.78166121.8570352.00.0041430.3039480.0000000.9895830.0416670.5037270.498850...-1.770833e-09-0.2139380.9947785.585047-0.213938-0.213938-2.139382e-01-0.2139380.1542511.0
16.98264451.6141101.00.0042860.8623040.0000000.9895830.0208330.6193770.333216...1.145833e-09-0.1665980.9947788.131040-0.166598-0.166598-1.665976e-01-0.166598-0.1665984.0
20.631072-1.49266910.0-0.0008740.2177312.0148660.9895830.2187500.5443442.323477...3.333333e-09-0.7830210.9947781.696814-0.783021-0.783021-7.830209e-011.2318451.5805722.0
32.1414063.3583236.00.0183560.0340310.0000000.9895830.1250000.7683981.151543...-5.000000e-09-0.4354380.9947783.662803-0.435438-0.435438-4.354381e-01-0.4354382.1259635.0
48.29579172.0982591.0-0.0023900.8641060.0000000.9895830.0208330.3330900.166928...-3.125000e-09-0.1331510.9947788.997227-0.133151-0.133151-1.331514e-01-0.133151-0.1331513.0
..................................................................
89215.89829834.6768282.00.0077340.8669260.0000000.9895830.0208330.6703380.416115...-1.135417e-09-0.1800660.9947786.532028-0.180066-0.180066-1.800663e-01-0.180066-0.1177734.0
89220.135169-0.44572733.00.0092390.5172161.5978630.9895830.458333-0.0371924.296933...-8.322917e-090.0943180.9947782.757423-1.858625-1.503544-7.933832e-010.8044791.5590264.0
8923-1.173111-0.63753523.0-0.0004990.1408190.5743350.9895830.489583-0.3333330.811278...-5.000000e-090.5743350.9947780.574335-1.723006-1.723006-5.000000e-090.5743350.5743355.0
89246.81721249.5799182.00.0059600.8597490.0000000.9895830.0208330.5614470.498850...7.291667e-10-0.1859830.9947788.019215-0.185983-0.185983-1.859831e-01-0.1859830.6761574.0
89255.77891234.67702219.00.0111770.6314510.1574680.9895830.0208330.5511563.885481...9.166668e-11-0.2454960.9947786.436382-0.303235-0.274366-2.717411e-01-0.1142730.4526104.0
\n

8926 rows × 29 columns

\n
" - }, - "execution_count": 160, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "transformed_ts" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 157, - "outputs": [ - { - "data": { - "text/plain": " 0 1 2 3 4 5 6 \\\ntarget \n1.0 4.781661 21.857035 2.0 0.004143 0.303948 0.000000 0.989583 \n2.0 0.631072 -1.492669 10.0 -0.000874 0.217731 2.014866 0.989583 \n3.0 8.295791 72.098259 1.0 -0.002390 0.864106 0.000000 0.989583 \n4.0 6.982644 51.614110 1.0 0.004286 0.862304 0.000000 0.989583 \n5.0 2.141406 3.358323 6.0 0.018356 0.034031 0.000000 0.989583 \n6.0 6.900998 51.500275 2.0 0.000755 0.874662 0.000000 0.989583 \n7.0 0.113258 -1.039021 18.0 0.011574 0.772800 1.886971 0.989583 \n\n 7 8 9 ... 18 19 20 \\\ntarget ... \n1.0 0.041667 0.503727 0.498850 ... 0.885914 -1.770833e-09 -0.213938 \n2.0 0.218750 0.544344 2.323477 ... 0.749433 3.333333e-09 -0.783021 \n3.0 0.020833 0.333090 0.166928 ... 0.931344 -3.125000e-09 -0.133151 \n4.0 0.020833 0.619377 0.333216 ... 0.931344 1.145833e-09 -0.166598 \n5.0 0.125000 0.768398 1.151543 ... 0.730208 -5.000000e-09 -0.435438 \n6.0 0.041667 0.272347 0.725212 ... 0.885914 -1.510417e-09 -0.189545 \n7.0 0.229167 0.777667 3.279687 ... 0.634906 7.458333e-09 0.493963 \n\n 21 22 23 24 25 26 27 \ntarget \n1.0 0.994778 5.585047 -0.213938 -0.213938 -0.213938 -0.213938 0.154251 \n2.0 0.994778 1.696814 -0.783021 -0.783021 -0.783021 1.231845 1.580572 \n3.0 0.994778 8.997227 -0.133151 -0.133151 -0.133151 -0.133151 -0.133151 \n4.0 0.994778 8.131040 -0.166598 -0.166598 -0.166598 -0.166598 -0.166598 \n5.0 0.994778 3.662803 -0.435438 -0.435438 -0.435438 -0.435438 2.125963 \n6.0 0.994778 8.182152 -0.189545 -0.189545 -0.189545 -0.189545 0.277471 \n7.0 0.994778 2.627060 -1.146882 -1.146882 -1.146882 0.740089 1.416937 \n\n[7 rows x 28 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0123456789...18192021222324252627
target
1.04.78166121.8570352.00.0041430.3039480.0000000.9895830.0416670.5037270.498850...0.885914-1.770833e-09-0.2139380.9947785.585047-0.213938-0.213938-0.213938-0.2139380.154251
2.00.631072-1.49266910.0-0.0008740.2177312.0148660.9895830.2187500.5443442.323477...0.7494333.333333e-09-0.7830210.9947781.696814-0.783021-0.783021-0.7830211.2318451.580572
3.08.29579172.0982591.0-0.0023900.8641060.0000000.9895830.0208330.3330900.166928...0.931344-3.125000e-09-0.1331510.9947788.997227-0.133151-0.133151-0.133151-0.133151-0.133151
4.06.98264451.6141101.00.0042860.8623040.0000000.9895830.0208330.6193770.333216...0.9313441.145833e-09-0.1665980.9947788.131040-0.166598-0.166598-0.166598-0.166598-0.166598
5.02.1414063.3583236.00.0183560.0340310.0000000.9895830.1250000.7683981.151543...0.730208-5.000000e-09-0.4354380.9947783.662803-0.435438-0.435438-0.435438-0.4354382.125963
6.06.90099851.5002752.00.0007550.8746620.0000000.9895830.0416670.2723470.725212...0.885914-1.510417e-09-0.1895450.9947788.182152-0.189545-0.189545-0.189545-0.1895450.277471
7.00.113258-1.03902118.00.0115740.7728001.8869710.9895830.2291670.7776673.279687...0.6349067.458333e-090.4939630.9947782.627060-1.146882-1.146882-1.1468820.7400891.416937
\n

7 rows × 28 columns

\n
" - }, - "execution_count": 157, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "transformed_ts.groupby(by='target').first()" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 158, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-10-14 17:24:46,814 - Reading data from D:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\data\\Phoneme\n", - "2024-10-14 17:24:47,323 - Data read successfully from local folder\n", - "2024-10-14 17:24:47,332 - Initialising experiment setup\n", - "2024-10-14 17:24:47,337 - -------------------------------------------------\n", - "2024-10-14 17:24:47,338 - Initialising Industrial Repository\n", - "2024-10-14 17:24:47,339 - -------------------------------------------------\n", - "2024-10-14 17:24:47,339 - Initialising Dask Server\n", - "Creating Dask Server\n", - "2024-10-14 17:24:47,351 - State start\n", - "2024-10-14 17:24:47,372 - Scheduler at: inproc://10.64.4.172/17848/87\n", - "2024-10-14 17:24:47,374 - dashboard at: http://10.64.4.172:61112/status\n", - "2024-10-14 17:24:47,376 - Registering Worker plugin shuffle\n", - "2024-10-14 17:24:47,396 - Start worker at: inproc://10.64.4.172/17848/90\n", - "2024-10-14 17:24:47,397 - Listening to: inproc10.64.4.172\n", - "2024-10-14 17:24:47,398 - Worker name: 0\n", - "2024-10-14 17:24:47,399 - dashboard at: 10.64.4.172:61113\n", - "2024-10-14 17:24:47,400 - Waiting to connect to: inproc://10.64.4.172/17848/87\n", - "2024-10-14 17:24:47,401 - -------------------------------------------------\n", - "2024-10-14 17:24:47,402 - Threads: 8\n", - "2024-10-14 17:24:47,403 - Memory: 31.95 GiB\n", - "2024-10-14 17:24:47,405 - Local Directory: C:\\Users\\user\\AppData\\Local\\Temp\\dask-scratch-space\\worker-70jkow7u\n", - "2024-10-14 17:24:47,406 - -------------------------------------------------\n", - "2024-10-14 17:24:47,413 - Register worker \n", - "2024-10-14 17:24:47,416 - Starting worker compute stream, inproc://10.64.4.172/17848/90\n", - "2024-10-14 17:24:47,417 - Starting established connection to inproc://10.64.4.172/17848/91\n", - "2024-10-14 17:24:47,424 - Starting Worker plugin shuffle\n", - "2024-10-14 17:24:47,428 - Registered to: inproc://10.64.4.172/17848/87\n", - "2024-10-14 17:24:47,429 - -------------------------------------------------\n", - "2024-10-14 17:24:47,431 - Starting established connection to inproc://10.64.4.172/17848/87\n", - "2024-10-14 17:24:47,439 - Receive client connection: Client-10acbaa4-8a38-11ef-85b8-b42e99a00ea1\n", - "2024-10-14 17:24:47,443 - Starting established connection to inproc://10.64.4.172/17848/92\n", - "2024-10-14 17:24:47,445 - LinK Dask Server - http://10.64.4.172:61112/status\n", - "2024-10-14 17:24:47,447 - -------------------------------------------------\n", - "2024-10-14 17:24:47,448 - Initialising solver\n", - "AssumptionsHandler - Initial pipeline fitting started\n", - "AssumptionsHandler - Initial pipeline was fitted successfully\n", - "AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 4.6 MiB, max: 6.8 MiB\n", - "ApiComposer - Initial pipeline was fitted in 2.5 sec.\n", - "AssumptionsHandler - Preset was changed to fast_train due to fit time estimation for initial model.\n", - "ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['xgboost', 'catboost', 'logit', 'dt', 'rf', 'mlp', 'lgbm', 'one_class_svm', 'inception_model', 'nbeats_model', 'tcn_model', 'deepar_model', 'channel_filtration', 'eigen_basis', 'wavelet_basis', 'fourier_basis', 'quantile_extractor', 'topological_extractor', 'minirocket_extractor', 'scaling', 'normalization', 'simple_imputation', 'kernel_pca', 'topological_extractor'].\n", - "ApiComposer - Timeout is too small for composing and is skipped because fit_time is 2.485536 sec.\n", - "DataSourceSplitter - Hold out validation is applied.\n", - "ApiComposer - Hyperparameters tuning started with 1 min. timeout\n", - "SimultaneousTuner - Hyperparameters optimization start: estimation of metric for initial graph\n", - "SimultaneousTuner - Initial graph: {'depth': 2, 'length': 2, 'nodes': [logit, quantile_extractor]}\n", - "logit - {}\n", - "quantile_extractor - {} \n", - "Initial metric: [0.083]\n", - " 0%| | 0/100000 [00:00 on (/n_quantile_extractor_{'add_global_features': True, 'stride': 8, 'window_size': 5};)/n_logit_{'C': 7.013310954134132, 'penalty': 'l1', 'solver': 'liblinear'}\n", - " 0%| | 1/100000 [00:00<13:58:19, 1.99trial/s, best loss: inf]2024-10-14 17:24:53,635 - build_posterior_wrapper took 0.010000 seconds\n", - "2024-10-14 17:24:53,647 - TPE using 1/1 trials with best loss inf\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'add_global_features': True, 'stride': 8, 'window_size': 30};)/n_logit_{'C': 4.629539905077404, 'penalty': 'l1', 'solver': 'liblinear'}\n", - " 0%| | 2/100000 [00:09<150:53:16, 5.43s/trial, best loss: inf]2024-10-14 17:25:02,519 - build_posterior_wrapper took 0.010000 seconds\n", - "2024-10-14 17:25:02,523 - TPE using 2/2 trials with best loss inf\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'add_global_features': False, 'stride': 3, 'window_size': 25};)/n_logit_{'C': 1.4623270887022872, 'penalty': 'l2', 'solver': 'liblinear'}\n", - " 0%| | 3/100000 [00:18<191:34:27, 6.90s/trial, best loss: inf]2024-10-14 17:25:11,161 - build_posterior_wrapper took 0.011999 seconds\n", - "2024-10-14 17:25:11,171 - TPE using 3/3 trials with best loss inf\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'add_global_features': False, 'stride': 5, 'window_size': 45};)/n_logit_{'C': 0.9861086567698546, 'penalty': 'l1', 'solver': 'liblinear'}\n", - " 0%| | 4/100000 [00:27<215:40:41, 7.76s/trial, best loss: inf]2024-10-14 17:25:20,255 - build_posterior_wrapper took 0.010998 seconds\n", - "2024-10-14 17:25:20,272 - TPE using 4/4 trials with best loss inf\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'add_global_features': False, 'stride': 7, 'window_size': 15};)/n_logit_{'C': 7.162224422667747, 'penalty': 'l2', 'solver': 'liblinear'}\n", - " 0%| | 5/100000 [00:36<232:48:32, 8.38s/trial, best loss: inf]2024-10-14 17:25:29,728 - build_posterior_wrapper took 0.010000 seconds\n", - "2024-10-14 17:25:29,734 - TPE using 5/5 trials with best loss inf\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'add_global_features': False, 'stride': 3, 'window_size': 35};)/n_logit_{'C': 5.358362329054688, 'penalty': 'l1', 'solver': 'liblinear'}\n", - " 0%| | 6/100000 [00:46<245:06:36, 8.82s/trial, best loss: inf]2024-10-14 17:25:39,416 - build_posterior_wrapper took 0.010999 seconds\n", - "2024-10-14 17:25:39,446 - TPE using 6/6 trials with best loss inf\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'add_global_features': False, 'stride': 4, 'window_size': 10};)/n_logit_{'C': 5.481147127254816, 'penalty': 'l2', 'solver': 'liblinear'}\n", - " 0%| | 7/100000 [00:57<228:34:40, 8.23s/trial, best loss: inf]\n", - "SimultaneousTuner - Hyperparameters optimization finished\n", - "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception <'list' object has no attribute 'supplementary_data'> on (/n_quantile_extractor_{'add_global_features': True, 'stride': 8, 'window_size': 5};)/n_logit_{'C': 7.013310954134132, 'penalty': 'l1', 'solver': 'liblinear'}\n", - "SimultaneousTuner - Return init graph due to the fact that obtained metric is None. Initial metric is 0.083\n", - "SimultaneousTuner - Final graph: {'depth': 2, 'length': 2, 'nodes': [logit, quantile_extractor]}\n", - "logit - {}\n", - "quantile_extractor - {}\n", - "SimultaneousTuner - Final metric: 0.083\n", - "ApiComposer - Hyperparameters tuning finished\n", - "ApiComposer - Model generation finished\n", - "FEDOT logger - Final pipeline was fitted\n", - "FEDOT logger - Final pipeline: {'depth': 2, 'length': 2, 'nodes': [logit, quantile_extractor]}\n", - "logit - {}\n", - "quantile_extractor - {}\n", - "MemoryAnalytics - Memory consumption for finish in main session: current 9.2 MiB, max: 9.8 MiB\n", - "FEDOT logger - Predictions was saved in current directory.\n", - "FEDOT logger - Predictions was saved in current directory.\n" - ] - } - ], - "source": [ - "result_dict = ApiTemplate(api_config=api_config,\n", - " metric_list=metric_names).eval(dataset='Phoneme',\n", - " finetune=finetune,\n", - " initial_assumption = node_list_model)" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": 159, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " accuracy f1 precision\n", - "0 0.262 0.21 0.148\n" - ] - } - ], - "source": [ - "print(result_dict['metrics'])" - ], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [], - "metadata": { - "collapsed": false, - "pycharm": { - "name": "#%%\n" - } - } - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file diff --git a/examples/tutorial/time_series/ts_classification/classification_example_advanced.ipynb b/examples/tutorial/time_series/ts_classification/classification_example_advanced.ipynb new file mode 100644 index 000000000..4d1dd3ec0 --- /dev/null +++ b/examples/tutorial/time_series/ts_classification/classification_example_advanced.ipynb @@ -0,0 +1,1542 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from hyperopt import hp\n", + "from fedot_ind.core.architecture.pipelines.abstract_pipeline import AbstractPipeline, ApiTemplate\n", + "from pathlib import Path\n", + "from fedot.core.data.data import InputData\n", + "from fedot.core.data.data_split import train_test_data_setup\n", + "from fedot.core.pipelines.pipeline_builder import PipelineBuilder\n", + "from fedot.core.repository.tasks import TsForecastingParams, Task, TaskTypesEnum\n", + "from matplotlib import pyplot as plt\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error\n", + "from fedot_ind.api.utils.path_lib import PROJECT_PATH\n", + "from fedot_ind.core.architecture.settings.computational import backend_methods as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "def plot_mean_sample(X,y, labels:list = [], n_channel: int = 1):\n", + " mean_sample = []\n", + " if len(labels) == 0:\n", + " labels = list(np.unique(y))\n", + " for label in labels:\n", + " mean_sample.append(np.mean(X[y == label] , axis=0)) # Данные класса 1\n", + " #ax = plt.gca()\n", + " channels = [f'Channel {x}' for x in range(n_channel)]\n", + " df = pd.DataFrame(mean_sample).T\n", + " df.columns = labels\n", + " df.plot(kind ='line',subplots=True, layout=(1,len(labels)),figsize=(20,10))\n", + " plt.legend(fontsize='small')\n", + " plt.legend(loc='upper left', bbox_to_anchor=(1, 1))\n", + " plt.show()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "def plot_mean_sample_multi(X,y, labels:list = [], n_channel: int = None):\n", + " mean_sample = {}\n", + " if len(labels) == 0:\n", + " labels = list(np.unique(y))\n", + " if n_channel is None:\n", + " n_channel = X.shape[1]\n", + " channels = [f'Channel {x}' for x in range(n_channel)]\n", + " for label in labels:\n", + " mask = y == label\n", + " for chn in range(n_channel):\n", + " mean_sample.update({f'Label_{label}_channel_{chn}':np.mean(X[mask.flatten(),chn,:] , axis=0)}) # Данные класса 1\n", + " #ax = plt.gca()\n", + " df = pd.DataFrame(mean_sample)\n", + " df.plot(kind ='line')\n", + " plt.suptitle('Усреднённые семплы по классам')\n", + " plt.legend(fontsize='small')\n", + " plt.legend(loc='upper left', bbox_to_anchor=(1, 1))\n", + " plt.show()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "finetune = False\n", + "metric_names = ('f1', 'accuracy', 'precision', 'roc_auc')\n", + "api_config = dict(problem='classification',\n", + " metric='accuracy',\n", + " timeout=1,\n", + " pop_size=10,\n", + " with_tunig=False,\n", + " n_jobs=2,\n", + " logging_level=20)\n", + "pipeline_creator = AbstractPipeline(task='classification')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Классификация с помощью геометрических преобразований" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "ECG = 'ECG200'\n", + "topological_model = ['topological_extractor', 'rf']\n", + "recurrence_model = ['recurrence_extractor', 'quantile_extractor', 'rf']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "ecg_dataset = pipeline_creator.create_input_data(ECG)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Topo Hyperparams" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "topological_params ={'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]},\n", + " 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]}}," + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [], + "source": [ + "stat_params = {'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]},\n", + " 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]},\n", + " 'add_global_features': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [], + "source": [ + "recurrence_params = {'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]},\n", + " 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]},\n", + " 'rec_metric': (hp.choice, [['cosine', 'euclidean']]),\n", + " 'image_mode': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]}}," + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [], + "source": [ + "rec_metric = 'cosine'\n", + "image_mode = True\n", + "window_size = 10\n", + "stride = 1" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [], + "source": [ + "topological_node_dict = {'topological_extractor':{'window_size':window_size,\n", + " 'stride':stride}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [], + "source": [ + "recurrence_node_dict = {'recurrence_extractor':{'window_size':window_size,\n", + " 'stride':stride,\n", + " 'rec_metric':rec_metric,\n", + " 'image_mode':image_mode}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [], + "source": [ + "feature_extractor = pipeline_creator.create_pipeline(topological_node_dict)\n", + "feature_matrix = feature_extractor.fit(ecg_dataset[0])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_mean_sample(ecg_dataset[0].features,ecg_dataset[0].target)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 15, + "outputs": [], + "source": [ + "feature_extractor = pipeline_creator.create_pipeline(recurrence_node_dict)\n", + "feature_matrix = feature_extractor.fit(ecg_dataset[0])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 16, + "outputs": [ + { + "data": { + "text/plain": "(100, 3, 87, 87)" + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_matrix.predict.shape" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [], + "source": [ + "explain_config = {'method': 'recurrence',\n", + " 'samples': 1,\n", + " 'metric': 'mean'}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 18, + "outputs": [], + "source": [ + "from fedot_ind.tools.explain.explain import RecurrenceExplainer\n", + "\n", + "rec_explain = RecurrenceExplainer(model=feature_extractor,\n", + " features=feature_matrix.predict,\n", + " target=ecg_dataset[0].target)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [], + "source": [ + "rec_explain.explain(**explain_config)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 20, + "outputs": [], + "source": [ + "rec_explain.visual()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 23, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from fedot_ind.core.operation.transformation.data.kernel_matrix import colorise\n", + "metric= 'mean'\n", + "name = 'test'\n", + "for classes, rec_matrix in rec_explain.rec_matrix_by_cls.items():\n", + " aggregated_rec_matrix = rec_explain.aggregate_func[metric](rec_matrix, axis=0)\n", + " aggregated_rec_matrix = colorise(aggregated_rec_matrix)\n", + " plt.imshow(aggregated_rec_matrix.T)\n", + " plt.colorbar()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [], + "source": [ + "topological_list_model = {'topological_extractor':{'window_size':window_size,\n", + " 'stride':stride},\n", + " 'logit':{}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 25, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating Dask Server\n", + "2024-11-04 12:17:22,942 - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy\n", + "2024-11-04 12:17:22,978 - State start\n", + "2024-11-04 12:17:22,988 - Scheduler at: inproc://10.64.4.172/5996/1\n", + "2024-11-04 12:17:22,989 - dashboard at: http://10.64.4.172:8787/status\n", + "2024-11-04 12:17:22,990 - Registering Worker plugin shuffle\n", + "2024-11-04 12:17:23,009 - Start worker at: inproc://10.64.4.172/5996/4\n", + "2024-11-04 12:17:23,010 - Listening to: inproc10.64.4.172\n", + "2024-11-04 12:17:23,011 - Worker name: 0\n", + "2024-11-04 12:17:23,012 - dashboard at: 10.64.4.172:61698\n", + "2024-11-04 12:17:23,012 - Waiting to connect to: inproc://10.64.4.172/5996/1\n", + "2024-11-04 12:17:23,013 - -------------------------------------------------\n", + "2024-11-04 12:17:23,014 - Threads: 8\n", + "2024-11-04 12:17:23,014 - Memory: 31.95 GiB\n", + "2024-11-04 12:17:23,015 - Local Directory: C:\\Users\\user\\AppData\\Local\\Temp\\dask-scratch-space\\worker-ofvxoj7v\n", + "2024-11-04 12:17:23,016 - -------------------------------------------------\n", + "2024-11-04 12:17:23,018 - Register worker \n", + "2024-11-04 12:17:23,020 - Starting worker compute stream, inproc://10.64.4.172/5996/4\n", + "2024-11-04 12:17:23,021 - Starting established connection to inproc://10.64.4.172/5996/5\n", + "2024-11-04 12:17:23,022 - Starting Worker plugin shuffle\n", + "2024-11-04 12:17:23,023 - Registered to: inproc://10.64.4.172/5996/1\n", + "2024-11-04 12:17:23,023 - -------------------------------------------------\n", + "2024-11-04 12:17:23,024 - Starting established connection to inproc://10.64.4.172/5996/1\n", + "2024-11-04 12:17:23,028 - Receive client connection: Client-99a03bfb-9a8d-11ef-976c-b42e99a00ea1\n", + "2024-11-04 12:17:23,030 - Starting established connection to inproc://10.64.4.172/5996/6\n", + "AssumptionsHandler - Initial pipeline fitting started\n", + "AssumptionsHandler - Initial pipeline was fitted successfully\n", + "AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 0.6 MiB, max: 0.7 MiB\n", + "ApiComposer - Initial pipeline was fitted in 7.2 sec.\n", + "AssumptionsHandler - Preset was changed to fast_train due to fit time estimation for initial model.\n", + "ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['xgboost', 'catboost', 'logit', 'dt', 'rf', 'mlp', 'lgbm', 'one_class_svm', 'inception_model', 'nbeats_model', 'tcn_model', 'deepar_model', 'channel_filtration', 'eigen_basis', 'wavelet_basis', 'fourier_basis', 'quantile_extractor', 'topological_extractor', 'minirocket_extractor', 'scaling', 'normalization', 'simple_imputation', 'kernel_pca'].\n", + "ApiComposer - Timeout is too small for composing and is skipped because fit_time is 7.248042 sec.\n", + "DataSourceSplitter - K-folds cross validation is applied.\n", + "ApiComposer - Hyperparameters tuning started with 1 min. timeout\n", + "DaskOptunaTuner - Hyperparameters optimization start: estimation of metric for initial graph\n", + "DaskOptunaTuner - Initial graph: {'depth': 2, 'length': 2, 'nodes': [logit, topological_extractor]}\n", + "logit - {}\n", + "topological_extractor - {'window_size': 10, 'stride': 1} \n", + "Initial metric: [0.759]\n", + "2024-11-04 12:17:32,500 - Run out-of-band function '_register_with_scheduler'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-11-04 12:17:32,502] A new study created in memory with name: no-name-3fe59ef6-c84c-4c3b-88a4-1ddc5fdd9233\n" + ] + }, + { + "data": { + "text/plain": " 0%| | 0/100 [00:00\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
accuracyf1precision
00.560.00.249
\n" + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result_dict_stat['metrics']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [ + { + "data": { + "text/plain": "(100, 1, 96)" + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ecg_dataset[0].features.shape" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [], + "source": [ + "multimodal_pipeline = {'recurrence_extractor': {\n", + " 'window_size': 30,\n", + " 'stride': 5,\n", + " 'image_mode': True},\n", + " 'resnet_model': {\n", + " 'epochs': 50,\n", + " 'batch_size': 8,\n", + " 'model_name': 'ResNet50'}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating Dask Server\n", + "2024-11-04 12:44:17,000 - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy\n", + "2024-11-04 12:44:17,032 - State start\n", + "2024-11-04 12:44:17,048 - Scheduler at: inproc://10.64.4.172/21776/1\n", + "2024-11-04 12:44:17,049 - dashboard at: http://10.64.4.172:8787/status\n", + "2024-11-04 12:44:17,050 - Registering Worker plugin shuffle\n", + "2024-11-04 12:44:17,061 - Start worker at: inproc://10.64.4.172/21776/4\n", + "2024-11-04 12:44:17,062 - Listening to: inproc10.64.4.172\n", + "2024-11-04 12:44:17,063 - Worker name: 0\n", + "2024-11-04 12:44:17,063 - dashboard at: 10.64.4.172:63240\n", + "2024-11-04 12:44:17,064 - Waiting to connect to: inproc://10.64.4.172/21776/1\n", + "2024-11-04 12:44:17,064 - -------------------------------------------------\n", + "2024-11-04 12:44:17,065 - Threads: 8\n", + "2024-11-04 12:44:17,066 - Memory: 31.95 GiB\n", + "2024-11-04 12:44:17,067 - Local Directory: C:\\Users\\user\\AppData\\Local\\Temp\\dask-scratch-space\\worker-woqorv18\n", + "2024-11-04 12:44:17,067 - -------------------------------------------------\n", + "2024-11-04 12:44:17,070 - Register worker \n", + "2024-11-04 12:44:17,072 - Starting worker compute stream, inproc://10.64.4.172/21776/4\n", + "2024-11-04 12:44:17,072 - Starting established connection to inproc://10.64.4.172/21776/5\n", + "2024-11-04 12:44:17,073 - Starting Worker plugin shuffle\n", + "2024-11-04 12:44:17,074 - Registered to: inproc://10.64.4.172/21776/1\n", + "2024-11-04 12:44:17,075 - -------------------------------------------------\n", + "2024-11-04 12:44:17,076 - Starting established connection to inproc://10.64.4.172/21776/1\n", + "2024-11-04 12:44:17,080 - Receive client connection: Client-5bad0a97-9a91-11ef-9510-b42e99a00ea1\n", + "2024-11-04 12:44:17,082 - Starting established connection to inproc://10.64.4.172/21776/6\n", + "AssumptionsHandler - Initial pipeline fitting started\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:02<00:00, 3.14it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 1, Accuracy= 0.45454545454545453, Training Loss: 0.80\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:02<00:00, 3.24it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 2, Accuracy= 0.6545454545454545, Training Loss: 0.64\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:02<00:00, 3.09it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 3, Accuracy= 0.7090909090909091, Training Loss: 0.69\n", + "EarlyStopping counter: 1 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:02<00:00, 3.30it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 4, Accuracy= 0.7090909090909091, Training Loss: 0.60\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.72it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 5, Accuracy= 0.7272727272727273, Training Loss: 0.60\n", + "EarlyStopping counter: 1 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.54it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 6, Accuracy= 0.6363636363636364, Training Loss: 0.64\n", + "EarlyStopping counter: 2 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.64it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 7, Accuracy= 0.7636363636363637, Training Loss: 0.52\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.57it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 8, Accuracy= 0.6909090909090909, Training Loss: 0.67\n", + "EarlyStopping counter: 1 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.78it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 9, Accuracy= 0.7272727272727273, Training Loss: 0.54\n", + "EarlyStopping counter: 2 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:02<00:00, 3.29it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 10, Accuracy= 0.7818181818181819, Training Loss: 0.47\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.64it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 11, Accuracy= 0.7090909090909091, Training Loss: 0.58\n", + "EarlyStopping counter: 1 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.69it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 12, Accuracy= 0.7454545454545455, Training Loss: 0.50\n", + "EarlyStopping counter: 2 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.54it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 13, Accuracy= 0.7636363636363637, Training Loss: 0.52\n", + "EarlyStopping counter: 3 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.70it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 14, Accuracy= 0.8, Training Loss: 0.47\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.56it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 15, Accuracy= 0.6727272727272727, Training Loss: 0.58\n", + "EarlyStopping counter: 1 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:02<00:00, 3.48it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 16, Accuracy= 0.7090909090909091, Training Loss: 0.58\n", + "EarlyStopping counter: 2 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.73it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 17, Accuracy= 0.8, Training Loss: 0.49\n", + "EarlyStopping counter: 3 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.51it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 18, Accuracy= 0.7636363636363637, Training Loss: 0.45\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:02<00:00, 2.96it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 19, Accuracy= 0.8363636363636363, Training Loss: 0.44\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:03<00:00, 2.15it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 20, Accuracy= 0.8545454545454545, Training Loss: 0.37\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:02<00:00, 2.52it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 21, Accuracy= 0.6545454545454545, Training Loss: 0.71\n", + "EarlyStopping counter: 1 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:02<00:00, 3.48it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 22, Accuracy= 0.7272727272727273, Training Loss: 0.49\n", + "EarlyStopping counter: 2 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.55it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 23, Accuracy= 0.7818181818181819, Training Loss: 0.51\n", + "EarlyStopping counter: 3 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.61it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 24, Accuracy= 0.7272727272727273, Training Loss: 0.58\n", + "EarlyStopping counter: 4 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:02<00:00, 3.46it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 25, Accuracy= 0.7090909090909091, Training Loss: 0.72\n", + "EarlyStopping counter: 5 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.55it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 26, Accuracy= 0.7454545454545455, Training Loss: 0.52\n", + "EarlyStopping counter: 6 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.67it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 27, Accuracy= 0.7090909090909091, Training Loss: 0.63\n", + "EarlyStopping counter: 7 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.69it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 28, Accuracy= 0.7090909090909091, Training Loss: 0.50\n", + "EarlyStopping counter: 8 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.60it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 29, Accuracy= 0.7090909090909091, Training Loss: 0.66\n", + "EarlyStopping counter: 9 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.64it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 30, Accuracy= 0.6545454545454545, Training Loss: 0.76\n", + "EarlyStopping counter: 10 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:02<00:00, 3.36it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 31, Accuracy= 0.7272727272727273, Training Loss: 0.64\n", + "EarlyStopping counter: 11 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.64it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 32, Accuracy= 0.6727272727272727, Training Loss: 0.72\n", + "EarlyStopping counter: 12 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.62it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 33, Accuracy= 0.6727272727272727, Training Loss: 0.70\n", + "EarlyStopping counter: 13 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.56it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 34, Accuracy= 0.6727272727272727, Training Loss: 0.87\n", + "EarlyStopping counter: 14 out of 15\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 7/7 [00:01<00:00, 3.54it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 35, Accuracy= 0.8181818181818182, Training Loss: 0.48\n", + "EarlyStopping counter: 15 out of 15\n", + "Early stopping\n", + "AssumptionsHandler - Initial pipeline was fitted successfully\n", + "AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 2.3 MiB, max: 196.5 MiB\n", + "ApiComposer - Initial pipeline was fitted in 257.7 sec.\n", + "AssumptionsHandler - Preset was changed to fast_train due to fit time estimation for initial model.\n", + "ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['xgboost', 'catboost', 'logit', 'dt', 'rf', 'mlp', 'lgbm', 'one_class_svm', 'inception_model', 'nbeats_model', 'tcn_model', 'deepar_model', 'channel_filtration', 'eigen_basis', 'wavelet_basis', 'fourier_basis', 'quantile_extractor', 'topological_extractor', 'minirocket_extractor', 'scaling', 'normalization', 'simple_imputation', 'kernel_pca'].\n", + "ApiComposer - Timeout is too small for composing and is skipped because fit_time is 257.743017 sec.\n", + "DataSourceSplitter - K-folds cross validation is applied.\n", + "ApiComposer - Time for pipeline composing was 0:00:00.\n", + "The remaining 0.4 seconds are not enough to tune the hyperparameters.\n", + "ApiComposer - Composed pipeline returned without tuning.\n", + "ApiComposer - Model generation finished\n", + "FEDOT logger - Already fitted initial pipeline is used\n", + "FEDOT logger - Final pipeline: {'depth': 2, 'length': 2, 'nodes': [resnet_model, recurrence_extractor]}\n", + "resnet_model - {'epochs': 50, 'batch_size': 8, 'model_name': 'ResNet50'}\n", + "recurrence_extractor - {'window_size': 30, 'stride': 5, 'image_mode': True}\n", + "MemoryAnalytics - Memory consumption for finish in main session: current 2.3 MiB, max: 196.5 MiB\n", + "FEDOT logger - Predictions was saved in current directory.\n", + "FEDOT logger - Predictions was saved in current directory.\n" + ] + } + ], + "source": [ + "result_dict = ApiTemplate(api_config=api_config,\n", + " metric_list=('f1', 'accuracy')).eval(dataset=ECG,\n", + " finetune=finetune,\n", + " initial_assumption=multimodal_pipeline)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 15, + "outputs": [ + { + "data": { + "text/plain": " accuracy f1\n0 0.63 0.0", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
accuracyf1
00.630.0
\n
" + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result_dict['metrics']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Прогнозирование с помощью топологических признаков" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 16, + "outputs": [], + "source": [ + "horizon = 365\n", + "PATH = Path(PROJECT_PATH, 'examples', 'data', 'ices_areas_ts.csv')\n", + "time_series_df = pd.read_csv(PATH).iloc[:, 1:]\n", + "target_series = time_series_df['Карское'].values\n", + "input_data = InputData.from_numpy_time_series(target_series,task=Task(TaskTypesEnum.ts_forecasting,task_params=TsForecastingParams(forecast_length=horizon)))\n", + "train_data, test_data = train_test_data_setup(input_data)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Построение бейзлайна и топологической модели" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [], + "source": [ + "from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels\n", + "repo = IndustrialModels().setup_repository()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 28, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LaggedTransformationImplementation - Window size of lagged transformation was changed by WindowSizeSelector from 0 to 1095\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[1;31mKeyboardInterrupt\u001B[0m Traceback (most recent call last)", + "Cell \u001B[1;32mIn[28], line 2\u001B[0m\n\u001B[0;32m 1\u001B[0m pipeline_based \u001B[38;5;241m=\u001B[39m PipelineBuilder()\u001B[38;5;241m.\u001B[39madd_node(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mlagged\u001B[39m\u001B[38;5;124m'\u001B[39m)\u001B[38;5;241m.\u001B[39madd_node(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtreg\u001B[39m\u001B[38;5;124m'\u001B[39m)\u001B[38;5;241m.\u001B[39mbuild()\n\u001B[1;32m----> 2\u001B[0m \u001B[43mpipeline_based\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\u001B[43mtrain_data\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 4\u001B[0m topological_pipeline \u001B[38;5;241m=\u001B[39m PipelineBuilder()\u001B[38;5;241m.\u001B[39madd_node(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mlagged\u001B[39m\u001B[38;5;124m'\u001B[39m)\u001B[38;5;241m.\u001B[39madd_node(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtopological_features\u001B[39m\u001B[38;5;124m'\u001B[39m)\u001B[38;5;241m.\u001B[39madd_node(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mlagged\u001B[39m\u001B[38;5;124m'\u001B[39m, branch_idx\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m2\u001B[39m)\u001B[38;5;241m.\u001B[39mjoin_branches(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mtreg\u001B[39m\u001B[38;5;124m'\u001B[39m)\u001B[38;5;241m.\u001B[39mbuild()\n\u001B[0;32m 5\u001B[0m topological_pipeline\u001B[38;5;241m.\u001B[39mfit(train_data)\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\fedot\\core\\pipelines\\pipeline.py:197\u001B[0m, in \u001B[0;36mPipeline.fit\u001B[1;34m(self, input_data, time_constraint, n_jobs)\u001B[0m\n\u001B[0;32m 194\u001B[0m copied_input_data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_assign_data_to_nodes(copied_input_data)\n\u001B[0;32m 196\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m time_constraint \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m--> 197\u001B[0m train_predicted \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_fit\u001B[49m\u001B[43m(\u001B[49m\u001B[43minput_data\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcopied_input_data\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 198\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m 199\u001B[0m train_predicted \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_fit_with_time_limit(input_data\u001B[38;5;241m=\u001B[39mcopied_input_data, time\u001B[38;5;241m=\u001B[39mtime_constraint)\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\fedot\\core\\pipelines\\pipeline.py:112\u001B[0m, in \u001B[0;36mPipeline._fit\u001B[1;34m(self, input_data, process_state_dict, fitted_operations)\u001B[0m\n\u001B[0;32m 110\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m Timer() \u001B[38;5;28;01mas\u001B[39;00m t:\n\u001B[0;32m 111\u001B[0m computation_time_update \u001B[38;5;241m=\u001B[39m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mroot_node\u001B[38;5;241m.\u001B[39mfitted_operation \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcomputation_time \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m\n\u001B[1;32m--> 112\u001B[0m train_predicted \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mroot_node\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\u001B[43minput_data\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minput_data\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 113\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m computation_time_update:\n\u001B[0;32m 114\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcomputation_time \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mround\u001B[39m(t\u001B[38;5;241m.\u001B[39mminutes_from_start, \u001B[38;5;241m3\u001B[39m)\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\fedot\\core\\pipelines\\node.py:200\u001B[0m, in \u001B[0;36mPipelineNode.fit\u001B[1;34m(self, input_data)\u001B[0m\n\u001B[0;32m 198\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfitted_operation \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m 199\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m Timer() \u001B[38;5;28;01mas\u001B[39;00m t:\n\u001B[1;32m--> 200\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfitted_operation, operation_predict \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moperation\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\u001B[43mparams\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_parameters\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 201\u001B[0m \u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43minput_data\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 202\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfit_time_in_seconds \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mround\u001B[39m(t\u001B[38;5;241m.\u001B[39mseconds_from_start, \u001B[38;5;241m3\u001B[39m)\n\u001B[0;32m 203\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\fedot\\core\\operations\\operation.py:87\u001B[0m, in \u001B[0;36mOperation.fit\u001B[1;34m(self, params, data)\u001B[0m\n\u001B[0;32m 75\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"This method is used for defining and running of the evaluation strategy\u001B[39;00m\n\u001B[0;32m 76\u001B[0m \u001B[38;5;124;03mto train the operation with the data provided\u001B[39;00m\n\u001B[0;32m 77\u001B[0m \n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 83\u001B[0m \u001B[38;5;124;03m tuple: trained operation and prediction on train data\u001B[39;00m\n\u001B[0;32m 84\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[0;32m 85\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_init(data\u001B[38;5;241m.\u001B[39mtask, params\u001B[38;5;241m=\u001B[39mparams, n_samples_data\u001B[38;5;241m=\u001B[39mdata\u001B[38;5;241m.\u001B[39mfeatures\u001B[38;5;241m.\u001B[39mshape[\u001B[38;5;241m0\u001B[39m])\n\u001B[1;32m---> 87\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfitted_operation \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_eval_strategy\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\u001B[43mtrain_data\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdata\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 89\u001B[0m predict_train \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mpredict_for_fit(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfitted_operation, data, params)\n\u001B[0;32m 91\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfitted_operation, predict_train\n", + "File \u001B[1;32mD:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\core\\operation\\interfaces\\industrial_model_strategy.py:150\u001B[0m, in \u001B[0;36mIndustrialSkLearnEvaluationStrategy.fit\u001B[1;34m(self, train_data)\u001B[0m\n\u001B[0;32m 148\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mfit\u001B[39m(\u001B[38;5;28mself\u001B[39m, train_data: InputData):\n\u001B[0;32m 149\u001B[0m train_data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmulti_dim_dispatcher\u001B[38;5;241m.\u001B[39m_convert_input_data(train_data)\n\u001B[1;32m--> 150\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmulti_dim_dispatcher\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\u001B[43mtrain_data\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[1;32mD:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\core\\operation\\interfaces\\industrial_preprocessing_strategy.py:209\u001B[0m, in \u001B[0;36mMultiDimPreprocessingStrategy.fit\u001B[1;34m(self, train_data)\u001B[0m\n\u001B[0;32m 204\u001B[0m fit_for_every_dim \u001B[38;5;241m=\u001B[39m curry(\u001B[38;5;241m2\u001B[39m)(\u001B[38;5;28;01mlambda\u001B[39;00m data, prev_state: \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_list_of_fitted_model(data, prev_state)\n\u001B[0;32m 205\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m operation_for_every_dim \u001B[38;5;28;01melse\u001B[39;00m prev_state)\n\u001B[0;32m 207\u001B[0m fit_multidim \u001B[38;5;241m=\u001B[39m curry(\u001B[38;5;241m2\u001B[39m)(\u001B[38;5;28;01mlambda\u001B[39;00m data, prev_state: prev_state\u001B[38;5;241m.\u001B[39mfit(data) \u001B[38;5;28;01mif\u001B[39;00m operation_for_multidim \u001B[38;5;28;01melse\u001B[39;00m prev_state)\n\u001B[1;32m--> 209\u001B[0m trained_operation \u001B[38;5;241m=\u001B[39m \u001B[43mEither\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43minsert\u001B[49m\u001B[43m(\u001B[49m\u001B[43mtrain_data\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m \u001B[49m\u001B[43m\\\u001B[49m\n\u001B[0;32m 210\u001B[0m \u001B[43m \u001B[49m\u001B[43mthen\u001B[49m\u001B[43m(\u001B[49m\u001B[43mfit_one_dim\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moperation_condition\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moperation_implementation\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241m.\u001B[39m \\\n\u001B[0;32m 211\u001B[0m then(channel_independent_branch(train_data))\u001B[38;5;241m.\u001B[39m \\\n\u001B[0;32m 212\u001B[0m then(fit_for_every_dim(train_data))\u001B[38;5;241m.\u001B[39mthen(fit_multidim(train_data))\u001B[38;5;241m.\u001B[39mvalue\n\u001B[0;32m 214\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m trained_operation\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\pymonad\\monad.py:152\u001B[0m, in \u001B[0;36mMonad.then\u001B[1;34m(self, function)\u001B[0m\n\u001B[0;32m 132\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mthen\u001B[39m(\n\u001B[0;32m 133\u001B[0m \u001B[38;5;28mself\u001B[39m: \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mMonad[S]\u001B[39m\u001B[38;5;124m'\u001B[39m, function: Union[Callable[[S], T], Callable[[S], \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mMonad[T]\u001B[39m\u001B[38;5;124m'\u001B[39m]]\n\u001B[0;32m 134\u001B[0m ) \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m>\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mMonad[T]\u001B[39m\u001B[38;5;124m'\u001B[39m:\n\u001B[0;32m 135\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\" Combines the functionality of bind and fmap.\u001B[39;00m\n\u001B[0;32m 136\u001B[0m \n\u001B[0;32m 137\u001B[0m \u001B[38;5;124;03m Instead of worrying about whether to use bind or fmap,\u001B[39;00m\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 150\u001B[0m \u001B[38;5;124;03m A monad value of the same type as 'self'\u001B[39;00m\n\u001B[0;32m 151\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[1;32m--> 152\u001B[0m result \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmap\u001B[49m\u001B[43m(\u001B[49m\u001B[43mfunction\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 153\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m 154\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m result\u001B[38;5;241m.\u001B[39mjoin()\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\pymonad\\either.py:106\u001B[0m, in \u001B[0;36mEither.map\u001B[1;34m(self, function)\u001B[0m\n\u001B[0;32m 104\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\n\u001B[0;32m 105\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 106\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__class__\u001B[39m(\u001B[43mfunction\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mvalue\u001B[49m\u001B[43m)\u001B[49m, (\u001B[38;5;28;01mNone\u001B[39;00m, \u001B[38;5;28;01mTrue\u001B[39;00m))\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\pymonad\\tools.py:50\u001B[0m, in \u001B[0;36m_curry_helper.._curry_internal\u001B[1;34m(*arguments)\u001B[0m\n\u001B[0;32m 48\u001B[0m all_arguments\u001B[38;5;241m.\u001B[39mextend(arguments)\n\u001B[0;32m 49\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(all_arguments) \u001B[38;5;241m>\u001B[39m\u001B[38;5;241m=\u001B[39m number_of_arguments: \u001B[38;5;66;03m# pylint: disable=no-else-return\u001B[39;00m\n\u001B[1;32m---> 50\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfunction_to_curry\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mall_arguments\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 51\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m 52\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m _curry_helper(number_of_arguments, function_to_curry, all_arguments)\n", + "File \u001B[1;32mD:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\core\\operation\\interfaces\\industrial_preprocessing_strategy.py:195\u001B[0m, in \u001B[0;36mMultiDimPreprocessingStrategy.fit..\u001B[1;34m(operation, init_state)\u001B[0m\n\u001B[0;32m 192\u001B[0m operation_for_multidim \u001B[38;5;241m=\u001B[39m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28many\u001B[39m([operation_for_one_dim, operation_for_every_dim])\n\u001B[0;32m 194\u001B[0m \u001B[38;5;66;03m# If model is classical sklearn model we use one_dimensional mode\u001B[39;00m\n\u001B[1;32m--> 195\u001B[0m fit_one_dim \u001B[38;5;241m=\u001B[39m curry(\u001B[38;5;241m2\u001B[39m)(\u001B[38;5;28;01mlambda\u001B[39;00m operation, init_state: \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit_one_sample\u001B[49m\u001B[43m(\u001B[49m\u001B[43minit_state\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 196\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m operation_for_one_dim \u001B[38;5;28;01melse\u001B[39;00m operation)\n\u001B[0;32m 198\u001B[0m \u001B[38;5;66;03m# Elif model could be use for each dimension(channel) independently we use channel_independent mode\u001B[39;00m\n\u001B[0;32m 199\u001B[0m channel_independent_branch \u001B[38;5;241m=\u001B[39m curry(\u001B[38;5;241m2\u001B[39m)(\u001B[38;5;28;01mlambda\u001B[39;00m data, prev_state: \u001B[38;5;28mlist\u001B[39m(deepcopy(prev_state) \u001B[38;5;28;01mfor\u001B[39;00m i \u001B[38;5;129;01min\u001B[39;00m\n\u001B[0;32m 200\u001B[0m \u001B[38;5;28mrange\u001B[39m(\u001B[38;5;28mlen\u001B[39m(data)))\n\u001B[0;32m 201\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m operation_for_every_dim \u001B[38;5;28;01melse\u001B[39;00m prev_state)\n", + "File \u001B[1;32mD:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\core\\operation\\interfaces\\industrial_preprocessing_strategy.py:157\u001B[0m, in \u001B[0;36mMultiDimPreprocessingStrategy.fit_one_sample\u001B[1;34m(self, train_data)\u001B[0m\n\u001B[0;32m 154\u001B[0m is_multi_target \u001B[38;5;241m=\u001B[39m is_multi_output_task(train_data)\n\u001B[0;32m 155\u001B[0m model_multi_adaptation \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mall\u001B[39m([is_model_not_support_multi, is_multi_target])\n\u001B[1;32m--> 157\u001B[0m operation_implementation \u001B[38;5;241m=\u001B[39m \u001B[43mEither\u001B[49m\u001B[43m(\u001B[49m\u001B[43mvalue\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mtrain_data\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 158\u001B[0m \u001B[43m \u001B[49m\u001B[43mmonoid\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43m[\u001B[49m\u001B[43mtrain_data\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mmodel_multi_adaptation\u001B[49m\u001B[43m]\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m \u001B[49m\u001B[43m\\\u001B[49m\n\u001B[0;32m 159\u001B[0m \u001B[43m \u001B[49m\u001B[43meither\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 160\u001B[0m \u001B[43m \u001B[49m\u001B[43mleft_function\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mlambda\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[43m:\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moperation_condition\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moperation_implementation\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 161\u001B[0m \u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfeatures\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtarget\u001B[49m\u001B[43m)\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43;01mif\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mnot_fedot_input_data\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43;01melse\u001B[39;49;00m\n\u001B[0;32m 162\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moperation_condition\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moperation_implementation\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdata\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 163\u001B[0m \u001B[43m \u001B[49m\u001B[43mright_function\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mlambda\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[43m:\u001B[49m\n\u001B[0;32m 164\u001B[0m \u001B[43m \u001B[49m\u001B[43mconvert_to_multivariate_model\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moperation_condition\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moperation_implementation\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 165\u001B[0m operation_implementation \u001B[38;5;241m=\u001B[39m operation_implementation \u001B[38;5;28;01mif\u001B[39;00m model_multi_adaptation \\\n\u001B[0;32m 166\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39moperation_condition\u001B[38;5;241m.\u001B[39moperation_implementation\n\u001B[0;32m 167\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m operation_implementation\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\pymonad\\either.py:91\u001B[0m, in \u001B[0;36mEither.either\u001B[1;34m(self, left_function, right_function)\u001B[0m\n\u001B[0;32m 89\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m right_function(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mvalue)\n\u001B[0;32m 90\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m---> 91\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mleft_function\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mmonoid\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;241;43m0\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[1;32mD:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\core\\operation\\interfaces\\industrial_preprocessing_strategy.py:160\u001B[0m, in \u001B[0;36mMultiDimPreprocessingStrategy.fit_one_sample..\u001B[1;34m(data)\u001B[0m\n\u001B[0;32m 154\u001B[0m is_multi_target \u001B[38;5;241m=\u001B[39m is_multi_output_task(train_data)\n\u001B[0;32m 155\u001B[0m model_multi_adaptation \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mall\u001B[39m([is_model_not_support_multi, is_multi_target])\n\u001B[0;32m 157\u001B[0m operation_implementation \u001B[38;5;241m=\u001B[39m Either(value\u001B[38;5;241m=\u001B[39mtrain_data,\n\u001B[0;32m 158\u001B[0m monoid\u001B[38;5;241m=\u001B[39m[train_data, model_multi_adaptation])\u001B[38;5;241m.\u001B[39m \\\n\u001B[0;32m 159\u001B[0m either(\n\u001B[1;32m--> 160\u001B[0m left_function\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mlambda\u001B[39;00m data: \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moperation_condition\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43moperation_implementation\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 161\u001B[0m \u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfeatures\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdata\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtarget\u001B[49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mif\u001B[39;00m not_fedot_input_data \u001B[38;5;28;01melse\u001B[39;00m\n\u001B[0;32m 162\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39moperation_condition\u001B[38;5;241m.\u001B[39moperation_implementation\u001B[38;5;241m.\u001B[39mfit(data),\n\u001B[0;32m 163\u001B[0m right_function\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mlambda\u001B[39;00m data:\n\u001B[0;32m 164\u001B[0m convert_to_multivariate_model(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39moperation_condition\u001B[38;5;241m.\u001B[39moperation_implementation, data))\n\u001B[0;32m 165\u001B[0m operation_implementation \u001B[38;5;241m=\u001B[39m operation_implementation \u001B[38;5;28;01mif\u001B[39;00m model_multi_adaptation \\\n\u001B[0;32m 166\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39moperation_condition\u001B[38;5;241m.\u001B[39moperation_implementation\n\u001B[0;32m 167\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m operation_implementation\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:473\u001B[0m, in \u001B[0;36mBaseForest.fit\u001B[1;34m(self, X, y, sample_weight)\u001B[0m\n\u001B[0;32m 462\u001B[0m trees \u001B[38;5;241m=\u001B[39m [\n\u001B[0;32m 463\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_make_estimator(append\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m, random_state\u001B[38;5;241m=\u001B[39mrandom_state)\n\u001B[0;32m 464\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m i \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mrange\u001B[39m(n_more_estimators)\n\u001B[0;32m 465\u001B[0m ]\n\u001B[0;32m 467\u001B[0m \u001B[38;5;66;03m# Parallel loop: we prefer the threading backend as the Cython code\u001B[39;00m\n\u001B[0;32m 468\u001B[0m \u001B[38;5;66;03m# for fitting the trees is internally releasing the Python GIL\u001B[39;00m\n\u001B[0;32m 469\u001B[0m \u001B[38;5;66;03m# making threading more efficient than multiprocessing in\u001B[39;00m\n\u001B[0;32m 470\u001B[0m \u001B[38;5;66;03m# that case. However, for joblib 0.12+ we respect any\u001B[39;00m\n\u001B[0;32m 471\u001B[0m \u001B[38;5;66;03m# parallel_backend contexts set at a higher level,\u001B[39;00m\n\u001B[0;32m 472\u001B[0m \u001B[38;5;66;03m# since correctness does not rely on using threads.\u001B[39;00m\n\u001B[1;32m--> 473\u001B[0m trees \u001B[38;5;241m=\u001B[39m \u001B[43mParallel\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 474\u001B[0m \u001B[43m \u001B[49m\u001B[43mn_jobs\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mn_jobs\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 475\u001B[0m \u001B[43m \u001B[49m\u001B[43mverbose\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mverbose\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 476\u001B[0m \u001B[43m \u001B[49m\u001B[43mprefer\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mthreads\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\n\u001B[0;32m 477\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 478\u001B[0m \u001B[43m \u001B[49m\u001B[43mdelayed\u001B[49m\u001B[43m(\u001B[49m\u001B[43m_parallel_build_trees\u001B[49m\u001B[43m)\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 479\u001B[0m \u001B[43m \u001B[49m\u001B[43mt\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 480\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbootstrap\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 481\u001B[0m \u001B[43m \u001B[49m\u001B[43mX\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 482\u001B[0m \u001B[43m \u001B[49m\u001B[43my\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 483\u001B[0m \u001B[43m \u001B[49m\u001B[43msample_weight\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 484\u001B[0m \u001B[43m \u001B[49m\u001B[43mi\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 485\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43mlen\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mtrees\u001B[49m\u001B[43m)\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 486\u001B[0m \u001B[43m \u001B[49m\u001B[43mverbose\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mverbose\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 487\u001B[0m \u001B[43m \u001B[49m\u001B[43mclass_weight\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mclass_weight\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 488\u001B[0m \u001B[43m \u001B[49m\u001B[43mn_samples_bootstrap\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mn_samples_bootstrap\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 489\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 490\u001B[0m \u001B[43m \u001B[49m\u001B[38;5;28;43;01mfor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[43mi\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mt\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01min\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43menumerate\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mtrees\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 491\u001B[0m \u001B[43m\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 493\u001B[0m \u001B[38;5;66;03m# Collect newly grown trees\u001B[39;00m\n\u001B[0;32m 494\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mestimators_\u001B[38;5;241m.\u001B[39mextend(trees)\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\sklearn\\utils\\parallel.py:63\u001B[0m, in \u001B[0;36mParallel.__call__\u001B[1;34m(self, iterable)\u001B[0m\n\u001B[0;32m 58\u001B[0m config \u001B[38;5;241m=\u001B[39m get_config()\n\u001B[0;32m 59\u001B[0m iterable_with_config \u001B[38;5;241m=\u001B[39m (\n\u001B[0;32m 60\u001B[0m (_with_config(delayed_func, config), args, kwargs)\n\u001B[0;32m 61\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m delayed_func, args, kwargs \u001B[38;5;129;01min\u001B[39;00m iterable\n\u001B[0;32m 62\u001B[0m )\n\u001B[1;32m---> 63\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[38;5;21;43m__call__\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43miterable_with_config\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\parallel.py:1918\u001B[0m, in \u001B[0;36mParallel.__call__\u001B[1;34m(self, iterable)\u001B[0m\n\u001B[0;32m 1916\u001B[0m output \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_get_sequential_output(iterable)\n\u001B[0;32m 1917\u001B[0m \u001B[38;5;28mnext\u001B[39m(output)\n\u001B[1;32m-> 1918\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m output \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mreturn_generator \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28;43mlist\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43moutput\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 1920\u001B[0m \u001B[38;5;66;03m# Let's create an ID that uniquely identifies the current call. If the\u001B[39;00m\n\u001B[0;32m 1921\u001B[0m \u001B[38;5;66;03m# call is interrupted early and that the same instance is immediately\u001B[39;00m\n\u001B[0;32m 1922\u001B[0m \u001B[38;5;66;03m# re-used, this id will be used to prevent workers that were\u001B[39;00m\n\u001B[0;32m 1923\u001B[0m \u001B[38;5;66;03m# concurrently finalizing a task from the previous call to run the\u001B[39;00m\n\u001B[0;32m 1924\u001B[0m \u001B[38;5;66;03m# callback.\u001B[39;00m\n\u001B[0;32m 1925\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_lock:\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\parallel.py:1847\u001B[0m, in \u001B[0;36mParallel._get_sequential_output\u001B[1;34m(self, iterable)\u001B[0m\n\u001B[0;32m 1845\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mn_dispatched_batches \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;241m1\u001B[39m\n\u001B[0;32m 1846\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mn_dispatched_tasks \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;241m1\u001B[39m\n\u001B[1;32m-> 1847\u001B[0m res \u001B[38;5;241m=\u001B[39m func(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n\u001B[0;32m 1848\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mn_completed_tasks \u001B[38;5;241m+\u001B[39m\u001B[38;5;241m=\u001B[39m \u001B[38;5;241m1\u001B[39m\n\u001B[0;32m 1849\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mprint_progress()\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\sklearn\\utils\\parallel.py:123\u001B[0m, in \u001B[0;36m_FuncWrapper.__call__\u001B[1;34m(self, *args, **kwargs)\u001B[0m\n\u001B[0;32m 121\u001B[0m config \u001B[38;5;241m=\u001B[39m {}\n\u001B[0;32m 122\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m config_context(\u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mconfig):\n\u001B[1;32m--> 123\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfunction(\u001B[38;5;241m*\u001B[39margs, \u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mkwargs)\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\sklearn\\ensemble\\_forest.py:186\u001B[0m, in \u001B[0;36m_parallel_build_trees\u001B[1;34m(tree, bootstrap, X, y, sample_weight, tree_idx, n_trees, verbose, class_weight, n_samples_bootstrap)\u001B[0m\n\u001B[0;32m 184\u001B[0m tree\u001B[38;5;241m.\u001B[39mfit(X, y, sample_weight\u001B[38;5;241m=\u001B[39mcurr_sample_weight, check_input\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m)\n\u001B[0;32m 185\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m--> 186\u001B[0m \u001B[43mtree\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\u001B[43mX\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43my\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43msample_weight\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43msample_weight\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mcheck_input\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43;01mFalse\u001B[39;49;00m\u001B[43m)\u001B[49m\n\u001B[0;32m 188\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m tree\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\sklearn\\tree\\_classes.py:1247\u001B[0m, in \u001B[0;36mDecisionTreeRegressor.fit\u001B[1;34m(self, X, y, sample_weight, check_input)\u001B[0m\n\u001B[0;32m 1218\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mfit\u001B[39m(\u001B[38;5;28mself\u001B[39m, X, y, sample_weight\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m, check_input\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m):\n\u001B[0;32m 1219\u001B[0m \u001B[38;5;250m \u001B[39m\u001B[38;5;124;03m\"\"\"Build a decision tree regressor from the training set (X, y).\u001B[39;00m\n\u001B[0;32m 1220\u001B[0m \n\u001B[0;32m 1221\u001B[0m \u001B[38;5;124;03m Parameters\u001B[39;00m\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 1244\u001B[0m \u001B[38;5;124;03m Fitted estimator.\u001B[39;00m\n\u001B[0;32m 1245\u001B[0m \u001B[38;5;124;03m \"\"\"\u001B[39;00m\n\u001B[1;32m-> 1247\u001B[0m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit\u001B[49m\u001B[43m(\u001B[49m\n\u001B[0;32m 1248\u001B[0m \u001B[43m \u001B[49m\u001B[43mX\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1249\u001B[0m \u001B[43m \u001B[49m\u001B[43my\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1250\u001B[0m \u001B[43m \u001B[49m\u001B[43msample_weight\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43msample_weight\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1251\u001B[0m \u001B[43m \u001B[49m\u001B[43mcheck_input\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mcheck_input\u001B[49m\u001B[43m,\u001B[49m\n\u001B[0;32m 1252\u001B[0m \u001B[43m \u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 1253\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\n", + "File \u001B[1;32m~\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\sklearn\\tree\\_classes.py:379\u001B[0m, in \u001B[0;36mBaseDecisionTree.fit\u001B[1;34m(self, X, y, sample_weight, check_input)\u001B[0m\n\u001B[0;32m 368\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m 369\u001B[0m builder \u001B[38;5;241m=\u001B[39m BestFirstTreeBuilder(\n\u001B[0;32m 370\u001B[0m splitter,\n\u001B[0;32m 371\u001B[0m min_samples_split,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 376\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mmin_impurity_decrease,\n\u001B[0;32m 377\u001B[0m )\n\u001B[1;32m--> 379\u001B[0m \u001B[43mbuilder\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbuild\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtree_\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mX\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43my\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43msample_weight\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 381\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mn_outputs_ \u001B[38;5;241m==\u001B[39m \u001B[38;5;241m1\u001B[39m \u001B[38;5;129;01mand\u001B[39;00m is_classifier(\u001B[38;5;28mself\u001B[39m):\n\u001B[0;32m 382\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mn_classes_ \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mn_classes_[\u001B[38;5;241m0\u001B[39m]\n", + "\u001B[1;31mKeyboardInterrupt\u001B[0m: " + ] + } + ], + "source": [ + "pipeline_based = PipelineBuilder().add_node('lagged').add_node('treg').build()\n", + "pipeline_based.fit(train_data)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "topological_pipeline = PipelineBuilder().add_node('lagged').add_node('topological_extractor').add_node('lagged', branch_idx=2).join_branches('treg').build()\n", + "topological_pipeline.fit(train_data)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "### Прогноз" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "forecast_base = np.ravel(pipeline_based.predict(test_data).predict)\n", + "forecast_topo = np.ravel(topological_pipeline.predict(test_data).predict)\n", + "\n", + "forecast_base[forecast_base < 0] = 0\n", + "forecast_topo[forecast_topo < 0] = 0" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Визуализация прогнозов" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + }, + "execution_count": 1 + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "plt.plot(input_data.features, label='real data')\n", + "plt.plot(np.arange(len(target_series) - horizon, len(target_series)),\n", + " forecast_base, label='forecast base')\n", + "plt.plot(np.arange(len(target_series) - horizon, len(target_series)),\n", + " forecast_topo, label='forecast topo')\n", + "\n", + "plt.grid()\n", + "plt.legend()\n", + "plt.show()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "print('base')\n", + "print(mean_squared_error(test_data.target, forecast_base, squared=False))\n", + "print(\n", + " mean_absolute_percentage_error(\n", + " test_data.target +\n", + " 1000,\n", + " forecast_base +\n", + " 1000))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "print('topo')\n", + "print(mean_squared_error(test_data.target, forecast_topo, squared=False))\n", + "print(\n", + " mean_absolute_percentage_error(\n", + " test_data.target +\n", + " 1000,\n", + " forecast_topo +\n", + " 1000))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/tutorial/time_series/ts_classification/classification_example_basic.ipynb b/examples/tutorial/time_series/ts_classification/classification_example_basic.ipynb new file mode 100644 index 000000000..5eac38f81 --- /dev/null +++ b/examples/tutorial/time_series/ts_classification/classification_example_basic.ipynb @@ -0,0 +1,1634 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "from hyperopt import hp\n", + "from fedot_ind.core.architecture.pipelines.abstract_pipeline import AbstractPipeline, ApiTemplate\n", + "from fedot_ind.core.repository.constanst_repository import SPECTRUM_ESTIMATORS\n", + "from fedot_ind.core.repository.constanst_repository import DISCRETE_WAVELETS, CONTINUOUS_WAVELETS" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "def plot_mean_sample(X,y, labels:list = [], n_channel: int = 1):\n", + " mean_sample = []\n", + " if len(labels) == 0:\n", + " labels = list(np.unique(y))\n", + " for label in labels:\n", + " mean_sample.append(np.mean(X[y == label] , axis=0)) # Данные класса 1\n", + " #ax = plt.gca()\n", + " channels = [f'Channel {x}' for x in range(n_channel)]\n", + " df = pd.DataFrame(mean_sample).T\n", + " df.columns = labels\n", + " df.plot(kind ='line',subplots=True, layout=(1,len(labels)),figsize=(20,10))\n", + " plt.legend(fontsize='small')\n", + " plt.legend(loc='upper left', bbox_to_anchor=(1, 1))\n", + " plt.show()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "def plot_mean_sample_multi(X,y, labels:list = [], n_channel: int = None):\n", + " mean_sample = {}\n", + " if len(labels) == 0:\n", + " labels = list(np.unique(y))\n", + " if n_channel is None:\n", + " n_channel = X.shape[1]\n", + " channels = [f'Channel {x}' for x in range(n_channel)]\n", + " for label in labels:\n", + " mask = y == label\n", + " for chn in range(n_channel):\n", + " mean_sample.update({f'Label_{label}_channel_{chn}':np.mean(X[mask.flatten(),chn,:] , axis=0)}) # Данные класса 1\n", + " #ax = plt.gca()\n", + " df = pd.DataFrame(mean_sample)\n", + " df.plot(kind ='line')\n", + " plt.suptitle('Усреднённые семплы по классам')\n", + " plt.legend(fontsize='small')\n", + " plt.legend(loc='upper left', bbox_to_anchor=(1, 1))\n", + " plt.show()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "finetune = False\n", + "metric_names = ('f1', 'accuracy', 'precision', 'roc_auc')\n", + "api_config = dict(problem='classification',\n", + " metric='accuracy',\n", + " timeout=1,\n", + " pop_size=10,\n", + " with_tunig=False,\n", + " n_jobs=2,\n", + " logging_level=20)\n", + "pipeline_creator = AbstractPipeline(task='classification')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# Our datasets and models for experiments" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "easy_to_clf_uno = 'ItalyPowerDemand'\n", + "hard_to_clf_uno = 'ElectricDevices'\n", + "easy_to_clf_multi = 'BasicMotions'\n", + "hard_to_clf_multi = 'AtrialFibrillation'\n", + "node_list_model = ['quantile_extractor','logit']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# Our datasets" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "easy_to_clf_uno_dataset = pipeline_creator.create_input_data(easy_to_clf_uno)\n", + "hard_to_clf_uno_dataset = pipeline_creator.create_input_data(hard_to_clf_uno)\n", + "easy_to_clf_multi_dataset = pipeline_creator.create_input_data(easy_to_clf_multi)\n", + "hard_to_clf_multi_dataset = pipeline_creator.create_input_data(hard_to_clf_multi)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# Lets Visualise our data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Easy to clf data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_mean_sample(easy_to_clf_uno_dataset[0].features,easy_to_clf_uno_dataset[0].target)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Hard to clf data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_mean_sample(hard_to_clf_uno_dataset[0].features,hard_to_clf_uno_dataset[0].target)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_mean_sample_multi(easy_to_clf_multi_dataset[0].features,easy_to_clf_multi_dataset[0].target)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# Transform initial row in feature vector. Easy dataset" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [], + "source": [ + "stat_pipeline = pipeline_creator.create_pipeline(node_list_model)\n", + "feature_extractor = pipeline_creator.create_pipeline(['quantile_extractor'])\n", + "feature_matrix = feature_extractor.fit(easy_to_clf_uno_dataset[0])\n", + "initial_ts, transformed_ts = pd.DataFrame(feature_matrix.features.squeeze()),pd.DataFrame(feature_matrix.predict.squeeze())\n", + "transformed_ts['target'] = feature_matrix.target" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [], + "source": [ + "node_dict = {'quantile_extractor':{'window_size':10,\n", + " 'stride':50}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [ + { + "data": { + "text/plain": " 0 1 2 3 4 5 6 \\\ntarget \n1.0 -0.532565 -1.170139 3.0 0.097182 0.785926 1.667704 0.958333 \n2.0 0.056140 -0.909639 3.0 0.075741 0.892806 1.435281 0.958333 \n\n 7 8 9 ... 18 19 20 \\\ntarget ... \n1.0 0.083333 0.898154 4.501629 ... 0.743115 1.125000e-09 0.416926 \n2.0 0.125000 0.841321 4.168296 ... 0.768841 -6.666667e-10 -0.030488 \n\n 21 22 23 24 25 26 27 \ntarget \n1.0 0.978945 1.417548 -1.641826 -1.608472 -0.997747 0.669957 1.205923 \n2.0 0.978945 1.714364 -1.550198 -1.541755 -0.762200 0.673081 1.489222 \n\n[2 rows x 28 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0123456789...18192021222324252627
target
1.0-0.532565-1.1701393.00.0971820.7859261.6677040.9583330.0833330.8981544.501629...0.7431151.125000e-090.4169260.9789451.417548-1.641826-1.608472-0.9977470.6699571.205923
2.00.056140-0.9096393.00.0757410.8928061.4352810.9583330.1250000.8413214.168296...0.768841-6.666667e-10-0.0304880.9789451.714364-1.550198-1.541755-0.7622000.6730811.489222
\n

2 rows × 28 columns

\n
" + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transformed_ts.groupby(by='target').first()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating Dask Server\n", + "2024-10-21 14:55:37,211 - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy\n", + "2024-10-21 14:55:37,217 - State start\n", + "2024-10-21 14:55:37,220 - Found stale lock file and directory 'C:\\\\Users\\\\user\\\\AppData\\\\Local\\\\Temp\\\\dask-scratch-space\\\\scheduler-ub6_f_nr', purging\n", + "2024-10-21 14:55:37,221 - Found stale lock file and directory 'C:\\\\Users\\\\user\\\\AppData\\\\Local\\\\Temp\\\\dask-scratch-space\\\\scheduler-wq5_42et', purging\n", + "2024-10-21 14:55:37,223 - Found stale lock file and directory 'C:\\\\Users\\\\user\\\\AppData\\\\Local\\\\Temp\\\\dask-scratch-space\\\\worker-5pg8n1u_', purging\n", + "2024-10-21 14:55:37,224 - Found stale lock file and directory 'C:\\\\Users\\\\user\\\\AppData\\\\Local\\\\Temp\\\\dask-scratch-space\\\\worker-a8c6mb2y', purging\n", + "2024-10-21 14:55:37,232 - Scheduler at: inproc://10.64.4.172/21832/1\n", + "2024-10-21 14:55:37,233 - dashboard at: http://10.64.4.172:8787/status\n", + "2024-10-21 14:55:37,233 - Registering Worker plugin shuffle\n", + "2024-10-21 14:55:37,246 - Start worker at: inproc://10.64.4.172/21832/4\n", + "2024-10-21 14:55:37,247 - Listening to: inproc10.64.4.172\n", + "2024-10-21 14:55:37,248 - Worker name: 0\n", + "2024-10-21 14:55:37,249 - dashboard at: 10.64.4.172:58597\n", + "2024-10-21 14:55:37,249 - Waiting to connect to: inproc://10.64.4.172/21832/1\n", + "2024-10-21 14:55:37,250 - -------------------------------------------------\n", + "2024-10-21 14:55:37,250 - Threads: 8\n", + "2024-10-21 14:55:37,251 - Memory: 31.95 GiB\n", + "2024-10-21 14:55:37,252 - Local Directory: C:\\Users\\user\\AppData\\Local\\Temp\\dask-scratch-space\\worker-p1tlllrf\n", + "2024-10-21 14:55:37,252 - -------------------------------------------------\n", + "2024-10-21 14:55:37,256 - Register worker \n", + "2024-10-21 14:55:37,259 - Starting worker compute stream, inproc://10.64.4.172/21832/4\n", + "2024-10-21 14:55:37,260 - Starting established connection to inproc://10.64.4.172/21832/5\n", + "2024-10-21 14:55:37,261 - Starting Worker plugin shuffle\n", + "2024-10-21 14:55:37,262 - Registered to: inproc://10.64.4.172/21832/1\n", + "2024-10-21 14:55:37,263 - -------------------------------------------------\n", + "2024-10-21 14:55:37,264 - Starting established connection to inproc://10.64.4.172/21832/1\n", + "2024-10-21 14:55:37,267 - Receive client connection: Client-62d9d225-8fa3-11ef-9548-b42e99a00ea1\n", + "2024-10-21 14:55:37,269 - Starting established connection to inproc://10.64.4.172/21832/6\n", + "AssumptionsHandler - Initial pipeline fitting started\n", + "AssumptionsHandler - Initial pipeline was fitted successfully\n", + "AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 0.4 MiB, max: 0.5 MiB\n", + "ApiComposer - Initial pipeline was fitted in 0.2 sec.\n", + "AssumptionsHandler - Preset was changed to best_quality due to fit time estimation for initial model.\n", + "ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['xgboost', 'catboost', 'logit', 'dt', 'rf', 'mlp', 'lgbm', 'one_class_svm', 'inception_model', 'nbeats_model', 'tcn_model', 'deepar_model', 'channel_filtration', 'eigen_basis', 'wavelet_basis', 'fourier_basis', 'quantile_extractor', 'topological_extractor', 'minirocket_extractor', 'scaling', 'normalization', 'simple_imputation', 'kernel_pca', 'topological_extractor'].\n", + "ApiComposer - Pipeline composition started.\n", + "DataSourceSplitter - K-folds cross validation is applied.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generations: 0%| | 0/10000 [00:00 on ((/n_quantile_extractor;)/n_topological_extractor;)/n_logit\n", + "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception on ((/n_fourier_basis;)/n_topological_extractor;;/n_quantile_extractor;)/n_logit\n", + "IndustrialDispatcher - 5 individuals out of 13 in previous population were evaluated successfully. 0.38461538461538464% is a fairly small percentage of successful evaluation.\n", + "IndustrialEvoOptimizer - Generation num: 1 size: 5\n", + "IndustrialEvoOptimizer - Best individuals: HallOfFame archive fitness (1): ['']\n", + "GroupedCondition - Optimisation stopped: Time limit is reached\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generations: 0%| | 0/10000 [00:36']\n", + "IndustrialEvoOptimizer - no improvements for 1 iterations\n", + "IndustrialEvoOptimizer - spent time: 0.6 min\n", + "GPComposer - GP composition finished\n", + "DataSourceSplitter - K-folds cross validation is applied.\n", + "ApiComposer - Hyperparameters tuning started with 0 min. timeout\n", + "SimultaneousTuner - Hyperparameters optimization start: estimation of metric for initial graph\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SimultaneousTuner - Initial graph: {'depth': 3, 'length': 4, 'nodes': [logit, quantile_extractor, channel_filtration, eigen_basis]}\n", + "logit - {}\n", + "quantile_extractor - {}\n", + "channel_filtration - {}\n", + "eigen_basis - {} \n", + "Initial metric: [0.956]\n", + " 0%| | 0/100000 [00:00 on ((/n_eigen_basis_{'low_rank_approximation': False, 'rank_regularization': 'hard_thresholding', 'window_size': 25};)/n_channel_filtration_{'centroid_metric': 'euclidean', 'distance': 'chebyshev', 'sample_metric': 'cosine', 'selection_strategy': 'sum'};;/n_quantile_extractor_{'add_global_features': True, 'stride': 2, 'window_size': 5};)/n_logit_{'C': 6.9870997389038, 'penalty': 'l2', 'solver': 'liblinear'}\n", + " 0%| | 1/100000 [00:00<5:40:02, 4.90trial/s, best loss: inf]2024-10-21 14:56:15,677 - build_posterior_wrapper took 0.020967 seconds\n", + "2024-10-21 14:56:15,680 - TPE using 1/1 trials with best loss inf\n", + "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception on ((/n_eigen_basis_{'low_rank_approximation': False, 'rank_regularization': 'explained_dispersion', 'window_size': 10};)/n_channel_filtration_{'centroid_metric': 'euclidean', 'distance': 'chebyshev', 'sample_metric': 'euclidean', 'selection_strategy': 'sum'};;/n_quantile_extractor_{'add_global_features': False, 'stride': 9, 'window_size': 20};)/n_logit_{'C': 9.584642642281313, 'penalty': 'l2', 'solver': 'liblinear'}\n", + " 0%| | 2/100000 [00:00<8:43:20, 3.18trial/s, best loss: inf]2024-10-21 14:56:16,068 - build_posterior_wrapper took 0.021000 seconds\n", + "2024-10-21 14:56:16,070 - TPE using 2/2 trials with best loss inf\n", + "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception on ((/n_eigen_basis_{'low_rank_approximation': False, 'rank_regularization': 'hard_thresholding', 'window_size': 35};)/n_channel_filtration_{'centroid_metric': 'chebyshev', 'distance': 'manhattan', 'sample_metric': 'cityblock', 'selection_strategy': 'sum'};;/n_quantile_extractor_{'add_global_features': True, 'stride': 6, 'window_size': 15};)/n_logit_{'C': 0.0829525514507987, 'penalty': 'l1', 'solver': 'liblinear'}\n", + " 0%| | 3/100000 [00:00<7:26:29, 3.73trial/s, best loss: inf]2024-10-21 14:56:16,284 - build_posterior_wrapper took 0.023040 seconds\n", + "2024-10-21 14:56:16,285 - TPE using 3/3 trials with best loss inf\n", + " 0%| | 4/100000 [00:02<20:47:01, 1.34trial/s, best loss: -0.9407114624505929]2024-10-21 14:56:17,770 - build_posterior_wrapper took 0.024544 seconds\n", + "2024-10-21 14:56:17,772 - TPE using 4/4 trials with best loss -0.940711\n", + " 0%| | 5/100000 [00:03<29:26:25, 1.06s/trial, best loss: -0.9703557312252965]2024-10-21 14:56:19,378 - build_posterior_wrapper took 0.021000 seconds\n", + "2024-10-21 14:56:19,380 - TPE using 5/5 trials with best loss -0.970356\n", + "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception on ((/n_eigen_basis_{'low_rank_approximation': False, 'rank_regularization': 'explained_dispersion', 'window_size': 30};)/n_channel_filtration_{'centroid_metric': 'chebyshev', 'distance': 'manhattan', 'sample_metric': 'cityblock', 'selection_strategy': 'sum'};;/n_quantile_extractor_{'add_global_features': False, 'stride': 8, 'window_size': 5};)/n_logit_{'C': 0.14323116482051435, 'penalty': 'l2', 'solver': 'liblinear'}\n", + " 0%| | 6/100000 [00:04<23:34:19, 1.18trial/s, best loss: -0.9703557312252965]2024-10-21 14:56:19,818 - build_posterior_wrapper took 0.022002 seconds\n", + "2024-10-21 14:56:19,820 - TPE using 6/6 trials with best loss -0.970356\n", + " 0%| | 7/100000 [00:05<30:12:27, 1.09s/trial, best loss: -0.9703557312252965]2024-10-21 14:56:21,398 - build_posterior_wrapper took 0.021999 seconds\n", + "2024-10-21 14:56:21,399 - TPE using 7/7 trials with best loss -0.970356\n", + "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception on ((/n_eigen_basis_{'low_rank_approximation': False, 'rank_regularization': 'hard_thresholding', 'window_size': 20};)/n_channel_filtration_{'centroid_metric': 'chebyshev', 'distance': 'euclidean', 'sample_metric': 'cosine', 'selection_strategy': 'sum'};;/n_quantile_extractor_{'add_global_features': True, 'stride': 5, 'window_size': 15};)/n_logit_{'C': 3.8955607183989067, 'penalty': 'l1', 'solver': 'liblinear'}\n", + " 0%| | 8/100000 [00:06<22:28:26, 1.24trial/s, best loss: -0.9703557312252965]2024-10-21 14:56:21,608 - build_posterior_wrapper took 0.019989 seconds\n", + "2024-10-21 14:56:21,611 - TPE using 8/8 trials with best loss -0.970356\n", + " 0%| | 9/100000 [00:08<35:31:26, 1.28s/trial, best loss: -0.9703557312252965]2024-10-21 14:56:23,922 - build_posterior_wrapper took 0.021008 seconds\n", + "2024-10-21 14:56:23,924 - TPE using 9/9 trials with best loss -0.970356\n", + " 0%| | 10/100000 [00:09<37:10:19, 1.34s/trial, best loss: -0.9703557312252965]2024-10-21 14:56:25,393 - build_posterior_wrapper took 0.021008 seconds\n", + "2024-10-21 14:56:25,394 - TPE using 10/10 trials with best loss -0.970356\n", + " 0%| | 11/100000 [00:11<37:17:59, 1.34s/trial, best loss: -0.9703557312252965]2024-10-21 14:56:26,750 - build_posterior_wrapper took 0.024029 seconds\n", + "2024-10-21 14:56:26,752 - TPE using 11/11 trials with best loss -0.970356\n", + " 0%| | 12/100000 [00:13<41:09:11, 1.48s/trial, best loss: -0.9703557312252965]2024-10-21 14:56:28,551 - build_posterior_wrapper took 0.024999 seconds\n", + "2024-10-21 14:56:28,553 - TPE using 12/12 trials with best loss -0.970356\n", + " 0%| | 13/100000 [00:14<42:03:20, 1.51s/trial, best loss: -0.9703557312252965]2024-10-21 14:56:30,134 - build_posterior_wrapper took 0.020999 seconds\n", + "2024-10-21 14:56:30,137 - TPE using 13/13 trials with best loss -0.970356\n", + " 0%| | 14/100000 [00:16<42:22:52, 1.53s/trial, best loss: -0.9703557312252965]2024-10-21 14:56:31,687 - build_posterior_wrapper took 0.020999 seconds\n", + "2024-10-21 14:56:31,689 - TPE using 14/14 trials with best loss -0.970356\n", + "PipelineObjectiveEvaluate - Unsuccessful pipeline fit during fitness evaluation. Skipping the pipeline. Exception on ((/n_eigen_basis_{'low_rank_approximation': False, 'rank_regularization': 'hard_thresholding', 'window_size': 20};)/n_channel_filtration_{'centroid_metric': 'chebyshev', 'distance': 'euclidean', 'sample_metric': 'minkowski', 'selection_strategy': 'sum'};;/n_quantile_extractor_{'add_global_features': True, 'stride': 5, 'window_size': 20};)/n_logit_{'C': 1.319640495646339, 'penalty': 'l1', 'solver': 'liblinear'}\n", + " 0%| | 15/100000 [00:16<31:23:21, 1.13s/trial, best loss: -0.9703557312252965]2024-10-21 14:56:31,900 - build_posterior_wrapper took 0.020999 seconds\n", + "2024-10-21 14:56:31,901 - TPE using 15/15 trials with best loss -0.970356\n", + " 0%| | 16/100000 [00:17<34:56:30, 1.26s/trial, best loss: -0.9703557312252965]2024-10-21 14:56:33,457 - build_posterior_wrapper took 0.023038 seconds\n", + "2024-10-21 14:56:33,459 - TPE using 16/16 trials with best loss -0.970356\n", + " 0%| | 17/100000 [00:19<37:51:41, 1.36s/trial, best loss: -0.9703557312252965]2024-10-21 14:56:35,068 - build_posterior_wrapper took 0.024730 seconds\n", + "2024-10-21 14:56:35,070 - TPE using 17/17 trials with best loss -0.970356\n", + " 0%| | 18/100000 [00:21<32:49:18, 1.18s/trial, best loss: -0.9703557312252965]\n", + "SimultaneousTuner - Hyperparameters optimization finished\n", + "SimultaneousTuner - Return tuned graph due to the fact that obtained metric 0.970 equal or better than initial (+ 0.05% deviation) 0.956\n", + "SimultaneousTuner - Final graph: {'depth': 3, 'length': 4, 'nodes': [logit, quantile_extractor, channel_filtration, eigen_basis]}\n", + "logit - {'C': 9.823134236772418, 'penalty': 'l2', 'solver': 'liblinear'}\n", + "quantile_extractor - {'add_global_features': False, 'stride': 2, 'window_size': 5}\n", + "channel_filtration - {'centroid_metric': 'chebyshev', 'distance': 'euclidean', 'sample_metric': 'euclidean', 'selection_strategy': 'sum'}\n", + "eigen_basis - {'low_rank_approximation': True, 'rank_regularization': 'hard_thresholding', 'window_size': 10}\n", + "SimultaneousTuner - Final metric: 0.970\n", + "ApiComposer - Hyperparameters tuning finished\n", + "ApiComposer - Model generation finished\n", + "FEDOT logger - Final pipeline was fitted\n", + "FEDOT logger - Final pipeline: {'depth': 3, 'length': 4, 'nodes': [logit, quantile_extractor, channel_filtration, eigen_basis]}\n", + "logit - {'C': 9.823134236772418, 'penalty': 'l2', 'solver': 'liblinear'}\n", + "quantile_extractor - {'add_global_features': False, 'stride': 2, 'window_size': 5}\n", + "channel_filtration - {'centroid_metric': 'chebyshev', 'distance': 'euclidean', 'sample_metric': 'euclidean', 'selection_strategy': 'sum'}\n", + "eigen_basis - {'low_rank_approximation': True, 'rank_regularization': 'hard_thresholding', 'window_size': 10}\n", + "MemoryAnalytics - Memory consumption for finish in main session: current 34.3 MiB, max: 44.1 MiB\n", + "FEDOT logger - Predictions was saved in current directory.\n", + "FEDOT logger - Predictions was saved in current directory.\n" + ] + } + ], + "source": [ + "result_dict = ApiTemplate(api_config=api_config,\n", + " metric_list=metric_names).eval(dataset='ItalyPowerDemand',\n", + " finetune=finetune,\n", + " initial_assumption = node_list_model)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " accuracy f1 precision\n", + "0 0.963 0.963 0.963\n" + ] + } + ], + "source": [ + "print(result_dict['metrics'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# Transform initial row in feature vector. Hard dataset" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 15, + "outputs": [], + "source": [ + "stat_pipeline = pipeline_creator.create_pipeline(node_list_model)\n", + "feature_extractor = pipeline_creator.create_pipeline(['quantile_extractor'])\n", + "feature_matrix = feature_extractor.fit(hard_to_clf_uno_dataset[0])\n", + "initial_ts, transformed_ts = pd.DataFrame(feature_matrix.features.squeeze()),pd.DataFrame(feature_matrix.predict.squeeze())\n", + "transformed_ts['target'] = feature_matrix.target" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 16, + "outputs": [ + { + "data": { + "text/plain": " 0 1 2 3 4 5 6 \\\n0 5.966985 38.522001 3.0 -0.001364 0.300924 0.000000 0.989583 \n1 2.726445 5.845252 30.0 0.010189 0.098683 0.022019 0.989583 \n2 0.873085 -0.957327 16.0 0.000105 0.094746 1.850185 0.989583 \n3 0.963595 -0.921340 7.0 0.003412 0.220262 1.687961 0.989583 \n4 0.710418 -0.989141 28.0 0.000204 0.061140 1.739863 0.989583 \n... ... ... ... ... ... ... ... \n8921 2.164995 2.743942 12.0 -0.003806 0.110030 0.000000 0.989583 \n8922 -1.239693 -0.473465 22.0 -0.000775 0.125696 0.000000 0.989583 \n8923 5.932263 36.119172 43.0 0.010063 0.060494 0.294614 0.989583 \n8924 6.315938 43.871301 3.0 0.002808 0.873119 0.000000 0.989583 \n8925 0.645619 -0.675459 16.0 0.010057 -0.061551 1.836893 0.989583 \n\n 7 8 9 ... 19 20 21 \\\n0 0.041667 0.375589 0.828100 ... -4.437500e-09 -0.213951 0.994778 \n1 0.041667 0.801460 4.898707 ... 7.291667e-10 -0.342460 0.994778 \n2 0.343750 0.174343 2.144298 ... 1.041667e-09 -0.713092 0.994778 \n3 0.156250 0.688291 1.923668 ... 3.437500e-09 -0.681677 0.994778 \n4 0.583333 -0.366485 3.064565 ... 1.979166e-10 -0.461327 0.994778 \n... ... ... ... ... ... ... ... \n8921 0.260417 -0.156627 0.572108 ... 4.166668e-10 -0.393694 0.994778 \n8922 0.479167 -0.315068 0.794347 ... 9.791667e-09 0.558378 0.994778 \n8923 0.020833 0.661946 1.267773 ... 1.625000e-09 -0.036827 0.994778 \n8924 0.041667 0.048379 0.333216 ... -4.625929e-18 -0.186165 0.994778 \n8925 0.187500 0.902991 3.320024 ... 3.802083e-09 -0.433040 0.994778 \n\n 22 23 24 25 26 27 target \n0 7.513700 -0.213951 -0.213951 -0.213951 -0.213951 0.328340 6.0 \n1 3.750432 -0.364030 -0.360435 -0.353245 -0.331226 2.910553 4.0 \n2 2.164974 -0.713092 -0.713092 -0.713092 1.137093 1.753821 2.0 \n3 1.915187 -0.681677 -0.681677 -0.681677 1.006285 1.742063 2.0 \n4 2.148468 -0.856751 -0.856751 -0.856751 0.883112 1.832129 2.0 \n... ... ... ... ... ... ... ... \n8921 2.513585 -0.393694 -0.393694 -0.393694 -0.393694 2.513585 5.0 \n8922 0.558378 -1.772245 -1.772245 0.558378 0.558378 0.558378 5.0 \n8923 6.444689 -0.331441 -0.331441 -0.331441 -0.036827 -0.036827 5.0 \n8924 7.870049 -0.186165 -0.186165 -0.186165 -0.186165 -0.186165 3.0 \n8925 2.854033 -0.916433 -0.916433 -0.916433 0.920461 1.524702 7.0 \n\n[8926 rows x 29 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0123456789...192021222324252627target
05.96698538.5220013.0-0.0013640.3009240.0000000.9895830.0416670.3755890.828100...-4.437500e-09-0.2139510.9947787.513700-0.213951-0.213951-0.213951-0.2139510.3283406.0
12.7264455.84525230.00.0101890.0986830.0220190.9895830.0416670.8014604.898707...7.291667e-10-0.3424600.9947783.750432-0.364030-0.360435-0.353245-0.3312262.9105534.0
20.873085-0.95732716.00.0001050.0947461.8501850.9895830.3437500.1743432.144298...1.041667e-09-0.7130920.9947782.164974-0.713092-0.713092-0.7130921.1370931.7538212.0
30.963595-0.9213407.00.0034120.2202621.6879610.9895830.1562500.6882911.923668...3.437500e-09-0.6816770.9947781.915187-0.681677-0.681677-0.6816771.0062851.7420632.0
40.710418-0.98914128.00.0002040.0611401.7398630.9895830.583333-0.3664853.064565...1.979166e-10-0.4613270.9947782.148468-0.856751-0.856751-0.8567510.8831121.8321292.0
..................................................................
89212.1649952.74394212.0-0.0038060.1100300.0000000.9895830.260417-0.1566270.572108...4.166668e-10-0.3936940.9947782.513585-0.393694-0.393694-0.393694-0.3936942.5135855.0
8922-1.239693-0.47346522.0-0.0007750.1256960.0000000.9895830.479167-0.3150680.794347...9.791667e-090.5583780.9947780.558378-1.772245-1.7722450.5583780.5583780.5583785.0
89235.93226336.11917243.00.0100630.0604940.2946140.9895830.0208330.6619461.267773...1.625000e-09-0.0368270.9947786.444689-0.331441-0.331441-0.331441-0.036827-0.0368275.0
89246.31593843.8713013.00.0028080.8731190.0000000.9895830.0416670.0483790.333216...-4.625929e-18-0.1861650.9947787.870049-0.186165-0.186165-0.186165-0.186165-0.1861653.0
89250.645619-0.67545916.00.010057-0.0615511.8368930.9895830.1875000.9029913.320024...3.802083e-09-0.4330400.9947782.854033-0.916433-0.916433-0.9164330.9204611.5247027.0
\n

8926 rows × 29 columns

\n
" + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transformed_ts" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [ + { + "data": { + "text/plain": " 0 1 2 3 4 5 6 \\\ntarget \n1.0 5.860285 35.044228 2.0 0.010183 0.859622 0.000000 0.989583 \n2.0 0.873085 -0.957327 16.0 0.000105 0.094746 1.850185 0.989583 \n3.0 4.781473 22.104585 5.0 0.002226 0.289339 0.000000 0.989583 \n4.0 2.726445 5.845252 30.0 0.010189 0.098683 0.022019 0.989583 \n5.0 1.309320 -0.292210 21.0 0.001156 0.110845 0.000000 0.989583 \n6.0 5.966985 38.522001 3.0 -0.001364 0.300924 0.000000 0.989583 \n7.0 5.442467 34.217175 5.0 0.002869 0.310618 0.000000 0.989583 \n\n 7 8 9 ... 18 19 20 \\\ntarget ... \n1.0 0.041667 0.321436 0.614287 ... 0.885914 2.479167e-09 -0.196559 \n2.0 0.343750 0.174343 2.144298 ... 0.695167 1.041667e-09 -0.713092 \n3.0 0.083333 -0.038430 0.498850 ... 0.788571 -2.166667e-09 -0.215854 \n4.0 0.041667 0.801460 4.898707 ... 0.591460 7.291667e-10 -0.342460 \n5.0 0.447917 -0.297297 0.776556 ... 0.670729 -1.250000e-09 -0.542402 \n6.0 0.041667 0.375589 0.828100 ... 0.806000 -4.437500e-09 -0.213951 \n7.0 0.020833 0.060477 0.663817 ... 0.806000 1.145833e-09 -0.241564 \n\n 21 22 23 24 25 26 27 \ntarget \n1.0 0.994778 6.413422 -0.196559 -0.196559 -0.196559 -0.196559 0.075523 \n2.0 0.994778 2.164974 -0.713092 -0.713092 -0.713092 1.137093 1.753821 \n3.0 0.994778 5.694767 -0.215854 -0.215854 -0.215854 -0.215854 0.131830 \n4.0 0.994778 3.750432 -0.364030 -0.360435 -0.353245 -0.331226 2.910553 \n5.0 0.994778 1.824445 -0.542402 -0.542402 -0.542402 -0.542402 1.824445 \n6.0 0.994778 7.513700 -0.213951 -0.213951 -0.213951 -0.213951 0.328340 \n7.0 0.994778 7.416788 -0.241564 -0.241564 -0.241564 -0.241564 1.253535 \n\n[7 rows x 28 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
0123456789...18192021222324252627
target
1.05.86028535.0442282.00.0101830.8596220.0000000.9895830.0416670.3214360.614287...0.8859142.479167e-09-0.1965590.9947786.413422-0.196559-0.196559-0.196559-0.1965590.075523
2.00.873085-0.95732716.00.0001050.0947461.8501850.9895830.3437500.1743432.144298...0.6951671.041667e-09-0.7130920.9947782.164974-0.713092-0.713092-0.7130921.1370931.753821
3.04.78147322.1045855.00.0022260.2893390.0000000.9895830.083333-0.0384300.498850...0.788571-2.166667e-09-0.2158540.9947785.694767-0.215854-0.215854-0.215854-0.2158540.131830
4.02.7264455.84525230.00.0101890.0986830.0220190.9895830.0416670.8014604.898707...0.5914607.291667e-10-0.3424600.9947783.750432-0.364030-0.360435-0.353245-0.3312262.910553
5.01.309320-0.29221021.00.0011560.1108450.0000000.9895830.447917-0.2972970.776556...0.670729-1.250000e-09-0.5424020.9947781.824445-0.542402-0.542402-0.542402-0.5424021.824445
6.05.96698538.5220013.0-0.0013640.3009240.0000000.9895830.0416670.3755890.828100...0.806000-4.437500e-09-0.2139510.9947787.513700-0.213951-0.213951-0.213951-0.2139510.328340
7.05.44246734.2171755.00.0028690.3106180.0000000.9895830.0208330.0604770.663817...0.8060001.145833e-09-0.2415640.9947787.416788-0.241564-0.241564-0.241564-0.2415641.253535
\n

7 rows × 28 columns

\n
" + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transformed_ts.groupby(by='target').first()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 18, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-10-21 14:56:55,596 - Reading data from D:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\data\\Phoneme\n", + "2024-10-21 14:56:56,087 - Data read successfully from local folder\n", + "2024-10-21 14:56:56,088 - Initialising experiment setup\n", + "2024-10-21 14:56:56,095 - -------------------------------------------------\n", + "2024-10-21 14:56:56,096 - Initialising Industrial Repository\n", + "2024-10-21 14:56:56,097 - -------------------------------------------------\n", + "2024-10-21 14:56:56,097 - Initialising Dask Server\n", + "Creating Dask Server\n", + "2024-10-21 14:56:56,104 - State start\n", + "2024-10-21 14:56:56,114 - Scheduler at: inproc://10.64.4.172/21832/14\n", + "2024-10-21 14:56:56,115 - dashboard at: http://10.64.4.172:58755/status\n", + "2024-10-21 14:56:56,116 - Registering Worker plugin shuffle\n", + "2024-10-21 14:56:56,128 - Start worker at: inproc://10.64.4.172/21832/17\n", + "2024-10-21 14:56:56,129 - Listening to: inproc10.64.4.172\n", + "2024-10-21 14:56:56,130 - Worker name: 0\n", + "2024-10-21 14:56:56,130 - dashboard at: 10.64.4.172:58756\n", + "2024-10-21 14:56:56,131 - Waiting to connect to: inproc://10.64.4.172/21832/14\n", + "2024-10-21 14:56:56,132 - -------------------------------------------------\n", + "2024-10-21 14:56:56,132 - Threads: 8\n", + "2024-10-21 14:56:56,133 - Memory: 31.95 GiB\n", + "2024-10-21 14:56:56,133 - Local Directory: C:\\Users\\user\\AppData\\Local\\Temp\\dask-scratch-space\\worker-w9ppu209\n", + "2024-10-21 14:56:56,134 - -------------------------------------------------\n", + "2024-10-21 14:56:56,137 - Register worker \n", + "2024-10-21 14:56:56,139 - Starting worker compute stream, inproc://10.64.4.172/21832/17\n", + "2024-10-21 14:56:56,139 - Starting established connection to inproc://10.64.4.172/21832/18\n", + "2024-10-21 14:56:56,140 - Starting Worker plugin shuffle\n", + "2024-10-21 14:56:56,142 - Registered to: inproc://10.64.4.172/21832/14\n", + "2024-10-21 14:56:56,143 - -------------------------------------------------\n", + "2024-10-21 14:56:56,145 - Starting established connection to inproc://10.64.4.172/21832/14\n", + "2024-10-21 14:56:56,148 - Receive client connection: Client-91de19b7-8fa3-11ef-9548-b42e99a00ea1\n", + "2024-10-21 14:56:56,150 - Starting established connection to inproc://10.64.4.172/21832/19\n", + "2024-10-21 14:56:56,152 - LinK Dask Server - http://10.64.4.172:58755/status\n", + "2024-10-21 14:56:56,152 - -------------------------------------------------\n", + "2024-10-21 14:56:56,153 - Initialising solver\n", + "AssumptionsHandler - Initial pipeline fitting started\n", + "AssumptionsHandler - Initial pipeline was fitted successfully\n", + "AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 5.2 MiB, max: 6.8 MiB\n", + "ApiComposer - Initial pipeline was fitted in 2.2 sec.\n", + "AssumptionsHandler - Preset was changed to fast_train due to fit time estimation for initial model.\n", + "ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['xgboost', 'catboost', 'logit', 'dt', 'rf', 'mlp', 'lgbm', 'one_class_svm', 'inception_model', 'nbeats_model', 'tcn_model', 'deepar_model', 'channel_filtration', 'eigen_basis', 'wavelet_basis', 'fourier_basis', 'quantile_extractor', 'topological_extractor', 'minirocket_extractor', 'scaling', 'normalization', 'simple_imputation', 'kernel_pca', 'topological_extractor'].\n", + "ApiComposer - Pipeline composition started.\n", + "DataSourceSplitter - Hold out validation is applied.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generations: 0%| | 0/10000 [00:00']\n", + "GroupedCondition - Optimisation stopped: Time limit is reached\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generations: 0%| | 0/10000 [03:39']\n", + "IndustrialEvoOptimizer - no improvements for 1 iterations\n", + "IndustrialEvoOptimizer - spent time: 3.7 min\n", + "GPComposer - GP composition finished\n", + "DataSourceSplitter - Hold out validation is applied.\n", + "ApiComposer - Time for pipeline composing was 0:03:39.954146.\n", + "The remaining 2.7 seconds are not enough to tune the hyperparameters.\n", + "ApiComposer - Composed pipeline returned without tuning.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ApiComposer - Model generation finished\n", + "FEDOT logger - Final pipeline was fitted\n", + "FEDOT logger - Final pipeline: {'depth': 2, 'length': 2, 'nodes': [logit, quantile_extractor]}\n", + "logit - {'C': 2.5395989642679324, 'penalty': 'l1', 'solver': 'liblinear'}\n", + "quantile_extractor - {'window_size': 5, 'stride': 3, 'add_global_features': False}\n", + "MemoryAnalytics - Memory consumption for finish in main session: current 68.0 MiB, max: 97.6 MiB\n", + "FEDOT logger - Predictions was saved in current directory.\n", + "FEDOT logger - Predictions was saved in current directory.\n" + ] + } + ], + "source": [ + "result_dict = ApiTemplate(api_config=api_config,\n", + " metric_list=metric_names).eval(dataset='Phoneme',\n", + " finetune=finetune,\n", + " initial_assumption = node_list_model)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " accuracy f1 precision\n", + "0 0.126 0.119 0.069\n" + ] + } + ], + "source": [ + "print(result_dict['metrics'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Классификация с помощью частотных преобразований" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "EMG = 'NerveDamage'\n", + "EEG = 'MotorImagery'\n", + "fourier_model = ['fourier_basis', 'quantile_extractor', 'rf']\n", + "wavelet_model = ['wavelet_basis', 'quantile_extractor', 'rf']\n", + "stat_model = ['quantile_extractor', 'rf']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "emg_dataset = pipeline_creator.create_input_data(EMG)\n", + "eeg_dataset = pipeline_creator.create_input_data(EEG)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### Fourier Hyperparams" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "fourier_params = {'threshold': {'hyperopt-dist': hp.choice, 'sampling-scope': [list(np.arange(0.75, 0.99, 0.05))]},\n", + " 'low_rank': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 30, 3)]]},\n", + " 'approximation': {'hyperopt-dist': hp.choice, 'sampling-scope': [['smooth', 'exact']]},\n", + " 'output_format': {'hyperopt-dist': hp.choice, 'sampling-scope': [['signal', 'spectrum']]}\n", + " }" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [], + "source": [ + "stat_params = {'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]},\n", + " 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]},\n", + " 'add_global_features': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [], + "source": [ + "wavelet_params = {'n_components': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 10]},\n", + " 'wavelet': {'hyperopt-dist': hp.choice,\n", + " 'sampling-scope': [['mexh', 'morl', 'db5', 'sym5']]}}\n", + "discrete_wav = DISCRETE_WAVELETS\n", + "cont_wat = CONTINUOUS_WAVELETS" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_mean_sample(emg_dataset[0].features,emg_dataset[0].target)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 61, + "outputs": [], + "source": [ + "threshold = 0.9\n", + "output_format = 'signal'\n", + "approximation = 'smooth'\n", + "low_rank = 10" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 62, + "outputs": [], + "source": [ + "estimator = SPECTRUM_ESTIMATORS['eigen']\n", + "wavelet = 'gaus8'\n", + "n_components = 5\n", + "low_freq = True" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 63, + "outputs": [], + "source": [ + "fourier_node_dict = {'fourier_basis':{'threshold':threshold,\n", + " 'approximation':approximation,\n", + " 'low_rank':low_rank}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 64, + "outputs": [], + "source": [ + "wavelet_node_dict = {'wavelet_basis':{'wavelet':wavelet,\n", + " 'n_components':n_components,\n", + " 'low_freq':low_freq}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 65, + "outputs": [], + "source": [ + "feature_extractor = pipeline_creator.create_pipeline(fourier_node_dict)\n", + "feature_matrix = feature_extractor.fit(emg_dataset[0])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 66, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_mean_sample(feature_matrix.predict, feature_matrix.target)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 50, + "outputs": [], + "source": [ + "feature_extractor = pipeline_creator.create_pipeline(wavelet_node_dict)\n", + "feature_matrix = feature_extractor.fit(emg_dataset[0])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 51, + "outputs": [], + "source": [ + "n_channels = list(range(feature_matrix.predict.shape[1]))" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 52, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for channel in n_channels:\n", + " y = np.expand_dims(feature_matrix.predict[:,channel,:], axis=1)\n", + " plot_mean_sample(y, feature_matrix.target)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 34, + "outputs": [], + "source": [ + "stat_list_model = {'quantile_extractor':{'window_size':10,\n", + " 'add_global_features':True,\n", + " 'use_sliding_window':False},\n", + " 'logit':{}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 35, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-10-21 15:03:35,270 - Reading data from D:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\data\\NerveDamage\n", + "2024-10-21 15:03:35,523 - Data read successfully from local folder\n", + "2024-10-21 15:03:35,527 - Initialising experiment setup\n", + "2024-10-21 15:03:35,549 - -------------------------------------------------\n", + "2024-10-21 15:03:35,550 - Initialising Industrial Repository\n", + "2024-10-21 15:03:35,551 - -------------------------------------------------\n", + "2024-10-21 15:03:35,551 - Initialising Dask Server\n", + "Creating Dask Server\n", + "2024-10-21 15:03:35,558 - State start\n", + "2024-10-21 15:03:35,568 - Scheduler at: inproc://10.64.4.172/21832/53\n", + "2024-10-21 15:03:35,569 - dashboard at: http://10.64.4.172:59133/status\n", + "2024-10-21 15:03:35,569 - Registering Worker plugin shuffle\n", + "2024-10-21 15:03:35,581 - Start worker at: inproc://10.64.4.172/21832/56\n", + "2024-10-21 15:03:35,582 - Listening to: inproc10.64.4.172\n", + "2024-10-21 15:03:35,583 - Worker name: 0\n", + "2024-10-21 15:03:35,583 - dashboard at: 10.64.4.172:59134\n", + "2024-10-21 15:03:35,584 - Waiting to connect to: inproc://10.64.4.172/21832/53\n", + "2024-10-21 15:03:35,584 - -------------------------------------------------\n", + "2024-10-21 15:03:35,584 - Threads: 8\n", + "2024-10-21 15:03:35,585 - Memory: 31.95 GiB\n", + "2024-10-21 15:03:35,586 - Local Directory: C:\\Users\\user\\AppData\\Local\\Temp\\dask-scratch-space\\worker-suxwo040\n", + "2024-10-21 15:03:35,586 - -------------------------------------------------\n", + "2024-10-21 15:03:35,589 - Register worker \n", + "2024-10-21 15:03:35,590 - Starting worker compute stream, inproc://10.64.4.172/21832/56\n", + "2024-10-21 15:03:35,591 - Starting established connection to inproc://10.64.4.172/21832/57\n", + "2024-10-21 15:03:35,592 - Starting Worker plugin shuffle\n", + "2024-10-21 15:03:35,593 - Registered to: inproc://10.64.4.172/21832/53\n", + "2024-10-21 15:03:35,593 - -------------------------------------------------\n", + "2024-10-21 15:03:35,594 - Starting established connection to inproc://10.64.4.172/21832/53\n", + "2024-10-21 15:03:35,598 - Receive client connection: Client-7ff51df7-8fa4-11ef-9548-b42e99a00ea1\n", + "2024-10-21 15:03:35,599 - Starting established connection to inproc://10.64.4.172/21832/58\n", + "2024-10-21 15:03:35,600 - LinK Dask Server - http://10.64.4.172:59133/status\n", + "2024-10-21 15:03:35,601 - -------------------------------------------------\n", + "2024-10-21 15:03:35,602 - Initialising solver\n", + "AssumptionsHandler - Initial pipeline fitting started\n", + "AssumptionsHandler - Initial pipeline was fitted successfully\n", + "AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 4.4 MiB, max: 6.3 MiB\n", + "ApiComposer - Initial pipeline was fitted in 2.3 sec.\n", + "AssumptionsHandler - Preset was changed to fast_train due to fit time estimation for initial model.\n", + "ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['xgboost', 'catboost', 'logit', 'dt', 'rf', 'mlp', 'lgbm', 'one_class_svm', 'inception_model', 'nbeats_model', 'tcn_model', 'deepar_model', 'channel_filtration', 'eigen_basis', 'wavelet_basis', 'fourier_basis', 'quantile_extractor', 'topological_extractor', 'minirocket_extractor', 'scaling', 'normalization', 'simple_imputation', 'kernel_pca', 'topological_extractor'].\n", + "ApiComposer - Pipeline composition started.\n", + "DataSourceSplitter - K-folds cross validation is applied.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generations: 0%| | 0/10000 [00:00.on_destroy at 0x000001C611AF2430>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\user\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\_dask.py\", line 87, in on_destroy\n", + " del self._data[key]\n", + "KeyError: 1950244182032\n", + "Exception ignored in: .on_destroy at 0x000001C6151208B0>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\user\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\_dask.py\", line 87, in on_destroy\n", + " del self._data[key]\n", + "KeyError: 1950267027344\n", + "Exception ignored in: .on_destroy at 0x000001C5FF3F4F70>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\user\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\_dask.py\", line 87, in on_destroy\n", + " del self._data[key]\n", + "KeyError: 1950202730416\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-10-21 15:04:53,169 - full garbage collection released 19.01 MiB from 36669 reference cycles (threshold: 9.54 MiB)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Exception ignored in: .on_destroy at 0x000001C616E32550>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\user\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\_dask.py\", line 87, in on_destroy\n", + " del self._data[key]\n", + "KeyError: 1950046001840\n", + "Exception ignored in: .on_destroy at 0x000001C6143D1EE0>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\user\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\_dask.py\", line 87, in on_destroy\n", + " del self._data[key]\n", + "KeyError: 1949910308496\n", + "Exception ignored in: .on_destroy at 0x000001C605C3E820>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\user\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\_dask.py\", line 87, in on_destroy\n", + " del self._data[key]\n", + "KeyError: 1950165793360\n", + "Exception ignored in: .on_destroy at 0x000001C612653670>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\user\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\_dask.py\", line 87, in on_destroy\n", + " del self._data[key]\n", + "KeyError: 1950223646032\n", + "Exception ignored in: .on_destroy at 0x000001C6151D6790>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\user\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\_dask.py\", line 87, in on_destroy\n", + " del self._data[key]\n", + "KeyError: 1950250361360\n", + "Exception ignored in: .on_destroy at 0x000001C61698E3A0>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\user\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\_dask.py\", line 87, in on_destroy\n", + " del self._data[key]\n", + "KeyError: 1950003899056\n", + "Exception ignored in: .on_destroy at 0x000001C612556790>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\user\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\_dask.py\", line 87, in on_destroy\n", + " del self._data[key]\n", + "KeyError: 1950193804368\n", + "Exception ignored in: .on_destroy at 0x000001C614E36160>\n", + "Traceback (most recent call last):\n", + " File \"C:\\Users\\user\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\fedot-ind-bTwQVkVM-py3.9\\lib\\site-packages\\joblib\\_dask.py\", line 87, in on_destroy\n", + " del self._data[key]\n", + "KeyError: 1950009270992\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "IndustrialDispatcher - 1 individuals out of 13 in previous population were evaluated successfully. 0.07692307692307693% is a fairly small percentage of successful evaluation.\n", + "IndustrialEvoOptimizer - Generation num: 1 size: 1\n", + "IndustrialEvoOptimizer - Best individuals: HallOfFame archive fitness (1): ['']\n", + "GroupedCondition - Optimisation stopped: Time limit is reached\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generations: 0%| | 0/10000 [05:17']\n", + "IndustrialEvoOptimizer - no improvements for 1 iterations\n", + "IndustrialEvoOptimizer - spent time: 5.3 min\n", + "GPComposer - GP composition finished\n", + "DataSourceSplitter - K-folds cross validation is applied.\n", + "ApiComposer - Time for pipeline composing was 0:05:17.341698.\n", + "The remaining 4.3 seconds are not enough to tune the hyperparameters.\n", + "ApiComposer - Composed pipeline returned without tuning.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ApiComposer - Model generation finished\n", + "FEDOT logger - Final pipeline was fitted\n", + "FEDOT logger - Final pipeline: {'depth': 2, 'length': 2, 'nodes': [logit, quantile_extractor]}\n", + "logit - {'C': 5.243806353645889, 'penalty': 'l1', 'solver': 'liblinear'}\n", + "quantile_extractor - {'window_size': 25, 'stride': 9, 'add_global_features': True}\n", + "MemoryAnalytics - Memory consumption for finish in main session: current 87.6 MiB, max: 96.2 MiB\n", + "FEDOT logger - Predictions was saved in current directory.\n", + "FEDOT logger - Predictions was saved in current directory.\n" + ] + } + ], + "source": [ + "result_dict_stat = ApiTemplate(api_config=api_config,\n", + " metric_list=metric_names).eval(dataset='NerveDamage',\n", + " finetune=finetune,\n", + " initial_assumption = stat_list_model)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 36, + "outputs": [ + { + "data": { + "text/plain": " accuracy f1 precision\n0 1.0 1.0 1.0", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
accuracyf1precision
01.01.01.0
\n
" + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result_dict_stat['metrics']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 67, + "outputs": [], + "source": [ + "fourier_list_model = {'fourier_basis':{'threshold':threshold,\n", + " 'approximation':approximation,\n", + " 'low_rank':low_rank},\n", + " 'quantile_extractor':{'window_size':10,\n", + " 'add_global_features':True,\n", + " 'use_sliding_window':False},\n", + " 'logit':{}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 68, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating Dask Server\n", + "2024-10-21 19:03:41,872 - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy\n", + "2024-10-21 19:03:41,877 - State start\n", + "2024-10-21 19:03:41,887 - Scheduler at: inproc://10.64.4.172/26676/1\n", + "2024-10-21 19:03:41,888 - dashboard at: http://10.64.4.172:8787/status\n", + "2024-10-21 19:03:41,889 - Registering Worker plugin shuffle\n", + "2024-10-21 19:03:41,905 - Start worker at: inproc://10.64.4.172/26676/4\n", + "2024-10-21 19:03:41,905 - Listening to: inproc10.64.4.172\n", + "2024-10-21 19:03:41,906 - Worker name: 0\n", + "2024-10-21 19:03:41,907 - dashboard at: 10.64.4.172:55982\n", + "2024-10-21 19:03:41,907 - Waiting to connect to: inproc://10.64.4.172/26676/1\n", + "2024-10-21 19:03:41,908 - -------------------------------------------------\n", + "2024-10-21 19:03:41,908 - Threads: 8\n", + "2024-10-21 19:03:41,908 - Memory: 31.95 GiB\n", + "2024-10-21 19:03:41,909 - Local Directory: C:\\Users\\user\\AppData\\Local\\Temp\\dask-scratch-space\\worker-olac703q\n", + "2024-10-21 19:03:41,910 - -------------------------------------------------\n", + "2024-10-21 19:03:41,914 - Register worker \n", + "2024-10-21 19:03:41,916 - Starting worker compute stream, inproc://10.64.4.172/26676/4\n", + "2024-10-21 19:03:41,916 - Starting established connection to inproc://10.64.4.172/26676/5\n", + "2024-10-21 19:03:41,917 - Starting Worker plugin shuffle\n", + "2024-10-21 19:03:41,918 - Registered to: inproc://10.64.4.172/26676/1\n", + "2024-10-21 19:03:41,919 - -------------------------------------------------\n", + "2024-10-21 19:03:41,920 - Starting established connection to inproc://10.64.4.172/26676/1\n", + "2024-10-21 19:03:41,923 - Receive client connection: Client-0acbf6d7-8fc6-11ef-a834-b42e99a00ea1\n", + "2024-10-21 19:03:41,925 - Starting established connection to inproc://10.64.4.172/26676/6\n", + "AssumptionsHandler - Initial pipeline fitting started\n", + "AssumptionsHandler - Initial pipeline was fitted successfully\n", + "AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 4.6 MiB, max: 13.2 MiB\n", + "ApiComposer - Initial pipeline was fitted in 4.6 sec.\n", + "AssumptionsHandler - Preset was changed to fast_train due to fit time estimation for initial model.\n", + "ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['xgboost', 'catboost', 'logit', 'dt', 'rf', 'mlp', 'lgbm', 'one_class_svm', 'inception_model', 'nbeats_model', 'tcn_model', 'deepar_model', 'channel_filtration', 'eigen_basis', 'wavelet_basis', 'fourier_basis', 'quantile_extractor', 'topological_extractor', 'minirocket_extractor', 'scaling', 'normalization', 'simple_imputation', 'kernel_pca', 'topological_extractor'].\n", + "ApiComposer - Timeout is too small for composing and is skipped because fit_time is 4.602908 sec.\n", + "DataSourceSplitter - K-folds cross validation is applied.\n", + "ApiComposer - Hyperparameters tuning started with 1 min. timeout\n", + "SimultaneousTuner - Hyperparameters optimization start: estimation of metric for initial graph\n", + "SimultaneousTuner - Initial graph: {'depth': 3, 'length': 3, 'nodes': [logit, quantile_extractor, fourier_basis]}\n", + "logit - {}\n", + "quantile_extractor - {'window_size': 10, 'add_global_features': True, 'use_sliding_window': False}\n", + "fourier_basis - {'threshold': 0.9, 'approximation': 'smooth', 'low_rank': 10} \n", + "Initial metric: [0.485]\n", + " 0%| | 0/10 [00:00\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
accuracyf1precision
00.5610.5760.512
\n" + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result_dict_fourier['metrics']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 40, + "outputs": [], + "source": [ + "wavelet_list_model = {'wavelet_basis':{'wavelet':wavelet,\n", + " 'n_components':n_components,\n", + " 'low_freq':low_freq},\n", + " 'quantile_extractor':{'window_size':10,\n", + " 'add_global_features':True,\n", + " 'use_sliding_window':False},\n", + " 'logit':{}}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 41, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-10-21 15:10:20,836 - Reading data from D:\\WORK\\Repo\\Industiral\\IndustrialTS\\fedot_ind\\data\\NerveDamage\n", + "2024-10-21 15:10:21,049 - Data read successfully from local folder\n", + "2024-10-21 15:10:21,054 - Initialising experiment setup\n", + "2024-10-21 15:10:21,080 - -------------------------------------------------\n", + "2024-10-21 15:10:21,084 - Initialising Industrial Repository\n", + "2024-10-21 15:10:21,085 - -------------------------------------------------\n", + "2024-10-21 15:10:21,086 - Initialising Dask Server\n", + "Creating Dask Server\n", + "2024-10-21 15:10:21,093 - State start\n", + "2024-10-21 15:10:21,103 - Scheduler at: inproc://10.64.4.172/21832/100\n", + "2024-10-21 15:10:21,103 - dashboard at: http://10.64.4.172:59476/status\n", + "2024-10-21 15:10:21,104 - Registering Worker plugin shuffle\n", + "2024-10-21 15:10:21,120 - Start worker at: inproc://10.64.4.172/21832/103\n", + "2024-10-21 15:10:21,121 - Listening to: inproc10.64.4.172\n", + "2024-10-21 15:10:21,122 - Worker name: 0\n", + "2024-10-21 15:10:21,122 - dashboard at: 10.64.4.172:59477\n", + "2024-10-21 15:10:21,122 - Waiting to connect to: inproc://10.64.4.172/21832/100\n", + "2024-10-21 15:10:21,123 - -------------------------------------------------\n", + "2024-10-21 15:10:21,123 - Threads: 8\n", + "2024-10-21 15:10:21,124 - Memory: 31.95 GiB\n", + "2024-10-21 15:10:21,124 - Local Directory: C:\\Users\\user\\AppData\\Local\\Temp\\dask-scratch-space\\worker-15z07gyq\n", + "2024-10-21 15:10:21,124 - -------------------------------------------------\n", + "2024-10-21 15:10:21,129 - Register worker \n", + "2024-10-21 15:10:21,131 - Starting worker compute stream, inproc://10.64.4.172/21832/103\n", + "2024-10-21 15:10:21,132 - Starting established connection to inproc://10.64.4.172/21832/104\n", + "2024-10-21 15:10:21,132 - Starting Worker plugin shuffle\n", + "2024-10-21 15:10:21,134 - Registered to: inproc://10.64.4.172/21832/100\n", + "2024-10-21 15:10:21,134 - -------------------------------------------------\n", + "2024-10-21 15:10:21,135 - Starting established connection to inproc://10.64.4.172/21832/100\n", + "2024-10-21 15:10:21,139 - Receive client connection: Client-71adcc2f-8fa5-11ef-9548-b42e99a00ea1\n", + "2024-10-21 15:10:21,141 - Starting established connection to inproc://10.64.4.172/21832/105\n", + "2024-10-21 15:10:21,142 - LinK Dask Server - http://10.64.4.172:59476/status\n", + "2024-10-21 15:10:21,147 - -------------------------------------------------\n", + "2024-10-21 15:10:21,148 - Initialising solver\n", + "AssumptionsHandler - Initial pipeline fitting started\n", + "AssumptionsHandler - Initial pipeline was fitted successfully\n", + "AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 4.5 MiB, max: 13.1 MiB\n", + "ApiComposer - Initial pipeline was fitted in 2.8 sec.\n", + "AssumptionsHandler - Preset was changed to fast_train due to fit time estimation for initial model.\n", + "ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['xgboost', 'catboost', 'logit', 'dt', 'rf', 'mlp', 'lgbm', 'one_class_svm', 'inception_model', 'nbeats_model', 'tcn_model', 'deepar_model', 'channel_filtration', 'eigen_basis', 'wavelet_basis', 'fourier_basis', 'quantile_extractor', 'topological_extractor', 'minirocket_extractor', 'scaling', 'normalization', 'simple_imputation', 'kernel_pca', 'topological_extractor'].\n", + "ApiComposer - Timeout is too small for composing and is skipped because fit_time is 2.789956 sec.\n", + "DataSourceSplitter - K-folds cross validation is applied.\n", + "ApiComposer - Hyperparameters tuning started with 1 min. timeout\n", + "SimultaneousTuner - Hyperparameters optimization start: estimation of metric for initial graph\n", + "SimultaneousTuner - Initial graph: {'depth': 3, 'length': 3, 'nodes': [logit, quantile_extractor, wavelet_basis]}\n", + "logit - {}\n", + "quantile_extractor - {'window_size': 10, 'add_global_features': True, 'use_sliding_window': False}\n", + "wavelet_basis - {'wavelet': 'gaus8', 'n_components': 5, 'low_freq': True} \n", + "Initial metric: [0.957]\n", + " 0%| | 0/10 [00:00\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
accuracyf1precision
01.01.01.0
\n" + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result_dict_wavelet['metrics']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/tutorial/time_series/ts_classification/tmp.py b/examples/tutorial/time_series/ts_classification/tmp.py new file mode 100644 index 000000000..1f838cf4f --- /dev/null +++ b/examples/tutorial/time_series/ts_classification/tmp.py @@ -0,0 +1,100 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from hyperopt import hp + +from fedot_ind.core.architecture.pipelines.abstract_pipeline import AbstractPipeline, ApiTemplate + + +def plot_mean_sample(X, y, labels: list = [], n_channel: int = 1): + mean_sample = [] + if len(labels) == 0: + labels = list(np.unique(y)) + for label in labels: + mean_sample.append(np.mean(X[y == label], axis=0)) # Данные класса 1 + # ax = plt.gca() + [f'Channel {x}' for x in range(n_channel)] + df = pd.DataFrame(mean_sample).T + df.columns = labels + df.plot(kind='line', subplots=True, layout=(1, len(labels)), figsize=(20, 10)) + plt.legend(fontsize='small') + plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) + plt.show() + + +# %% +def plot_mean_sample_multi(X, y, labels: list = [], n_channel: int = None): + mean_sample = {} + if len(labels) == 0: + labels = list(np.unique(y)) + if n_channel is None: + n_channel = X.shape[1] + [f'Channel {x}' for x in range(n_channel)] + for label in labels: + mask = y == label + for chn in range(n_channel): + mean_sample.update( + {f'Label_{label}_channel_{chn}': np.mean(X[mask.flatten(), chn, :], axis=0)}) # Данные класса 1 + # ax = plt.gca() + df = pd.DataFrame(mean_sample) + df.plot(kind='line') + plt.suptitle('Усреднённые семплы по классам') + plt.legend(fontsize='small') + plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) + plt.show() + + +# %% md +# Topo Hyperparams +# %% +topological_params = {'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]}, + 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]}}, +# %% +stat_params = {'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]}, + 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]}, + 'add_global_features': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]}} +# %% +recurrence_params = {'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]}, + 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]}, + 'rec_metric': (hp.choice, [['cosine', 'euclidean']]), + 'image_mode': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]}}, +# %% +rec_metric = 'cosine' +image_mode = True +window_size = 10 +stride = 1 +# %% +topological_node_dict = {'topological_extractor': {'window_size': window_size, + 'stride': stride}} +# %% +recurrence_node_dict = {'recurrence_extractor': {'window_size': window_size, + 'stride': stride, + 'rec_metric': rec_metric, + 'image_mode': image_mode}} + +finetune = False +metric_names = ('f1', 'accuracy', 'precision', 'roc_auc') +api_config = dict(problem='classification', + metric='accuracy', + timeout=1, + pop_size=20, + with_tuning=True, + with_tunig=False, + n_jobs=-1, + logging_level=20) +pipeline_creator = AbstractPipeline(task='classification') +ECG = 'Lightning7' +topological_model = ['topological_extractor', 'rf'] +recurrence_model = ['recurrence_extractor', 'quantile_extractor', 'rf'] +# %% +ecg_dataset = pipeline_creator.create_input_data(ECG) + +if __name__ == "__main__": + topo_list_model = { + 'topological_extractor': {'window_size': 10}, + 'logit': {}} + result_dict_topo = ApiTemplate(api_config=api_config, + metric_list=metric_names).eval(dataset=ECG, + finetune=finetune, + initial_assumption=topo_list_model) + _ = 1 diff --git a/fedot_ind/__init__.py b/fedot_ind/__init__.py index 7845c276a..e72d218bc 100644 --- a/fedot_ind/__init__.py +++ b/fedot_ind/__init__.py @@ -2,3 +2,4 @@ __all__ = ['fedot_api'] +__version__ = "0.5.0" diff --git a/fedot_ind/api/main.py b/fedot_ind/api/main.py index 4f35773b7..6b176f009 100644 --- a/fedot_ind/api/main.py +++ b/fedot_ind/api/main.py @@ -1,11 +1,13 @@ import os import warnings from copy import deepcopy +from functools import partial from typing import Union import numpy as np import pandas as pd from fedot.api.main import Fedot +from fedot.core.data.data import OutputData from fedot.core.pipelines.pipeline import Pipeline from fedot.core.visualisation.pipeline_specific_visuals import PipelineHistoryVisualizer from golem.core.optimisers.opt_history_objects.opt_history import OptHistory @@ -13,14 +15,20 @@ from sklearn import model_selection as skms from sklearn.calibration import CalibratedClassifierCV +import fedot_ind.core.repository.constanst_repository as CONST_REPO from fedot_ind.api.utils.api_init import ApiManager from fedot_ind.api.utils.checkers_collections import DataCheck from fedot_ind.core.architecture.abstraction.decorators import DaskServer -from fedot_ind.core.architecture.pipelines.classification import SklearnCompatibleClassifier +from fedot_ind.core.architecture.pipelines.classification import ( + SklearnCompatibleClassifier, +) from fedot_ind.core.architecture.preprocessing.data_convertor import ApiConverter -from fedot_ind.core.repository.constanst_repository import \ - FEDOT_GET_METRICS, FEDOT_TUNING_METRICS, \ - FEDOT_TUNER_STRATEGY +from fedot_ind.core.optimizer.FedotEvoOptimizer import FedotEvoOptimizer +from fedot_ind.core.repository.constanst_repository import ( + FEDOT_GET_METRICS, + FEDOT_TUNER_STRATEGY, + FEDOT_TUNING_METRICS, +) from fedot_ind.core.repository.industrial_implementations.abstract import build_tuner from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels @@ -28,11 +36,32 @@ class FedotIndustrial(Fedot): - """This class is used to run Fedot in industrial mode as FedotIndustrial. + """Main class for Industrial API. It provides a high-level interface for working with the + Fedot framework. The class allows you to train, predict, and evaluate models for time series. + All arguments are passed as keyword arguments and handled by the ApiManager class. Args: - input_config: dictionary with the parameters of the experiment. - output_folder: path to the folder where the results will be saved. + problem: str. The type of task to solve. Available options: 'ts_forecasting', 'ts_classification', 'ts_regression'. + timeout: int. Time for model design (in minutes): ``None`` or ``-1`` means infinite time. + logging_level: logging levels are the same as in + `built-in logging library `_. + + .. details:: Possible options: + + - ``50`` -> critical + - ``40`` -> error + - ``30`` -> warning + - ``20`` -> info + - ``10`` -> debug + - ``0`` -> nonset + backend_method: str. Default `cpu`. The method for backend. Available options: 'cpu', 'dask'. + initial_assumption: Pipeline = None. The initial pipeline for the model. + optimizer_params: dict = None. + task_params: dict = None. + strategy: str = None. + strategy_params: dict = None. + available_operations: list = None. + output_folder: str = './output'. Example: First, configure experiment and instantiate FedotIndustrial class:: @@ -42,7 +71,6 @@ class FedotIndustrial(Fedot): industrial = FedotIndustrial(problem='ts_classification', - use_cache=False, timeout=15, n_jobs=2, logging_level=20) @@ -62,42 +90,54 @@ class FedotIndustrial(Fedot): def __init__(self, **kwargs): super(Fedot, self).__init__() - self.api_controller = ApiManager(**kwargs) - self.config_dict = self.api_controller.config_dict - self.logger = self.api_controller.logger - self.industrial_strategy_class = self.api_controller.industrial_strategy_class - - def __init_solver(self): - self.logger.info(f'-------------------------------------------------') + self.manager = ApiManager(**kwargs) + self.config = self.manager.config + self.logger = self.manager.logger + self.strategy_cls = self.manager.strategy_class + self.solver = self.manager.solver + self.__init_industrial_backend() + + def __init_industrial_backend(self): + self.logger.info('-' * 50) self.logger.info('Initialising Industrial Repository') - if self.api_controller.is_default_fedot_context: + if self.manager.is_default_fedot_context: self.repo = IndustrialModels().setup_default_repository() - self.config_dict['optimizer'] = None + self.config['optimizer'] = FedotEvoOptimizer else: - self.repo = IndustrialModels().setup_repository() + self.repo = IndustrialModels().setup_repository(backend=self.manager.backend_method) + + def __init_evolution_optimisation_params(self): self.logger.info(f'-------------------------------------------------') + self.logger.info('Initialising Evolutionary Optimisation params') + if self.manager.optimizer_params is not None: + self.config['optimizer'] = partial(self.config['optimizer'], + optimisation_params=self.manager.optimizer_params) + + def __init_solver(self): + self.logger.info('-' * 50) self.logger.info('Initialising Dask Server') - self.config_dict['initial_assumption'] = self.config_dict['initial_assumption'].build() + self.config['initial_assumption'] = self.config['initial_assumption'].build() self.dask_client = DaskServer().client - self.logger.info(f'LinK Dask Server - {self.dask_client.dashboard_link}') - self.logger.info(f'-------------------------------------------------') + setattr(CONST_REPO, 'DASK_CLIENT', self.dask_client) + self.logger.info(f'Link Dask Server - {self.dask_client.dashboard_link}') + self.logger.info('-' * 50) self.logger.info('Initialising solver') - self.solver = Fedot(**self.config_dict) - # if self.api_controller.is_default_fedot_context: - # self.solver = self.api_controller._check_mutations(self.solver) + self.__init_industrial_backend() + self.__init_evolution_optimisation_params() + self.solver = Fedot(**self.config) def _process_input_data(self, input_data): train_data = deepcopy(input_data) # we do not want to make inplace changes input_preproc = DataCheck( input_data=train_data, - task=self.config_dict['problem'], - task_params=self.api_controller.task_params, + task=self.config['problem'], + task_params=self.manager.task_params, fit_stage=True, - industrial_task_params=self.api_controller.industrial_strategy_params) + industrial_task_params=self.manager) train_data = input_preproc.check_input_data() self.target_encoder = input_preproc.get_target_encoder() - train_data.features = train_data.features.squeeze() if self.api_controller.is_default_fedot_context \ + train_data.features = train_data.features.squeeze() if self.manager.is_default_fedot_context \ else train_data.features return train_data @@ -120,22 +160,26 @@ def __calibrate_probs(self, industrial_model): return calibrated_proba def __predict_for_ensemble(self): - predict = self.industrial_strategy_class.predict( + predict = self.strategy_cls.predict( self.predict_data, 'probs') - ensemble_strat = self.industrial_strategy_class.ensemble_strategy - predict = {strategy: np.argmax(self.industrial_strategy_class.ensemble_predictions(predict, strategy), axis=1) + ensemble_strat = self.strategy_cls.ensemble_strategy + predict = {strategy: np.argmax(self.strategy_cls.ensemble_predictions(predict, strategy), axis=1) for strategy in ensemble_strat} return predict def __abstract_predict(self, predict_mode): - have_encoder = self.api_controller.condition_check.solver_have_target_encoder(self.target_encoder) + have_encoder = self.manager.condition_check.solver_have_target_encoder(self.target_encoder) labels_output = predict_mode in ['labels'] - default_fedot_strategy = self.api_controller.industrial_strategy is None - custom_predict = self.solver.predict if default_fedot_strategy else self.industrial_strategy_class.predict + default_fedot_strategy = self.manager.industrial_strategy is None + custom_predict = self.solver.predict if default_fedot_strategy else self.manager.strategy_class.predict + have_proba_output = hasattr(self.solver, 'predict_proba') + self.__init_industrial_backend() + default_fedot_strategy = self.manager.strategy_class is None + custom_predict = self.solver.predict if default_fedot_strategy else self.strategy_cls.predict predict_function = Either(value=custom_predict, monoid=['prob', labels_output]).either( - left_function=lambda prob_func: self.solver.predict_proba, + left_function=lambda prob_func: self.solver.predict_proba if have_proba_output else self.solver.predict, right_function=lambda label_func: label_func) def _inverse_encoder_transform(predict): @@ -149,6 +193,12 @@ def _inverse_encoder_transform(predict): value=self.predict_data, monoid=[False, True]).then( function=lambda x: predict_function(x, predict_mode)).then( lambda x: _inverse_encoder_transform(x) if have_encoder else x).value + if isinstance(predict, OutputData): + predict = predict.predict + try: + predict = np.argmax(predict, axis=1) if predict.shape[1] != 1 else predict + except Exception: + predict = predict return predict def _metric_evaluation_loop(self, @@ -168,7 +218,7 @@ def _metric_evaluation_loop(self, in predicted_labels.items()} return metric_dict else: - if self.api_controller.condition_check.solver_have_target_encoder(self.target_encoder): + if self.manager.condition_check.solver_have_target_encoder(self.target_encoder): new_target = self.target_encoder.transform(target.flatten()) labels = self.target_encoder.transform(predicted_labels).reshape(valid_shape) else: @@ -192,15 +242,15 @@ def fit(self, **kwargs: additional parameters """ - custom_fit = all([self.api_controller.industrial_strategy is not None, - self.api_controller.industrial_strategy != 'anomaly_detection']) self.is_finetuned = False self.train_data = self._process_input_data(input_data) self.__init_solver() + Either(value=self.train_data, - monoid=[self.train_data, - custom_fit]).either(left_function=self.solver.fit, - right_function=self.industrial_strategy_class.fit) + monoid=[self.train_data, self.strategy_cls is None]).either( + left_function=lambda data: self.strategy_cls.fit(data), + right_function=self.solver.fit + ) def predict(self, predict_data: tuple, @@ -239,7 +289,7 @@ def predict_proba(self, """ self.predict_data = self._process_input_data(predict_data) - self.predicted_probs = self.predicted_labels if self.api_controller.is_regression_task_context \ + self.predicted_probs = self.predicted_labels if self.manager.is_regression_task_context \ else self.__abstract_predict(predict_mode) return self.__calibrate_probs(self.solver.current_pipeline) if calibrate_probs else self.predicted_probs @@ -261,19 +311,19 @@ def finetune(self, self.is_finetuned = True train_data = self._process_input_data(train_data) if \ - not self.api_controller.condition_check.input_data_is_fedot_type(train_data) else train_data + not self.manager.condition_check.input_data_is_fedot_type(train_data) else train_data if tuning_params is None: tuning_params = ApiConverter.tuning_params_is_none(tuning_params) - tuning_params['metric'] = FEDOT_TUNING_METRICS[self.config_dict['problem']] + tuning_params['metric'] = FEDOT_TUNING_METRICS[self.config['problem']] for tuner_name, tuner_type in FEDOT_TUNER_STRATEGY.items(): - if self.api_controller.condition_check.solver_is_fedot_class(self.solver): + if self.manager.condition_check.solver_is_fedot_class(self.solver): model_to_tune = deepcopy(self.solver.current_pipeline) - elif not self.api_controller.condition_check.solver_is_none(model_to_tune): + elif not self.manager.condition_check.solver_is_none(model_to_tune): model_to_tune = model_to_tune else: model_to_tune = deepcopy( - self.config_dict['initial_assumption']).build() + self.config['initial_assumption']).build() tuning_params['tuner'] = tuner_type pipeline_tuner, model_to_tune = build_tuner( self, model_to_tune, tuning_params, train_data, mode) @@ -283,7 +333,7 @@ def finetune(self, def get_metrics(self, target: Union[list, np.array] = None, - metric_names: tuple = ('f1', 'roc_auc', 'accuracy'), + metric_names: tuple = None, rounding_order: int = 3, **kwargs) -> pd.DataFrame: """ @@ -303,7 +353,8 @@ def get_metrics(self, pandas DataFrame with calculated metrics """ - problem = self.config_dict['problem'] + problem = self.config['problem'] + if problem == 'classification' and self.predicted_probs is None and 'roc_auc' in metric_names: self.logger.info('Predicted probabilities are not available. Use `predict_proba()` method first') if isinstance(self.predicted_probs, dict): @@ -375,16 +426,16 @@ def load(self, path): def save_optimization_history(self, return_history: bool = False): return self.solver.history if return_history else self.solver.history.save( - f"{self.api_controller.output_folder}/" + f"{self.manager.output_folder}/" f"optimization_history.json") def save_best_model(self): Either(value=self.solver, - monoid=[self.solver, self.api_controller.condition_check.solver_is_fedot_class(self.solver)]).either( - left_function=lambda pipeline: pipeline.save(path=self.api_controller.output_folder, + monoid=[self.solver, self.manager.condition_check.solver_is_fedot_class(self.solver)]).either( + left_function=lambda pipeline: pipeline.save(path=self.manager.output_folder, create_subdir=True, is_datetime_in_path=True), - right_function=lambda solver: solver.current_pipeline.save(path=self.api_controller.output_folder, + right_function=lambda solver: solver.current_pipeline.save(path=self.manager.output_folder, create_subdir=True, is_datetime_in_path=True)) @@ -404,9 +455,9 @@ def explain(self, explaing_config: dict = {}): name = explaing_config.get('name', 'test') method = explaing_config.get('method', 'point') - explainer = self.api_controller.explain_methods[method](model=self, - features=self.predict_data.features.squeeze(), - target=self.predict_data.target) + explainer = self.manager.explain_methods[method](model=self, + features=self.predict_data.features.squeeze(), + target=self.predict_data.target) explainer.explain(n_samples=samples, window=window, method=metric) explainer.visual(metric=metric, threshold=threshold, name=name) @@ -434,14 +485,17 @@ def vis_optimisation_history(self, opt_history_path: str = None, history_visualizer.diversity_population, dict( save_path='diversity_population.gif', fps=1))} - def plot_func(mode): return vis_func[mode][0](**vis_func[mode][1]) + def plot_func(mode): + return vis_func[mode][0](**vis_func[mode][1]) Either(value=vis_func, monoid=[mode, mode == 'all']).either( left_function=plot_func, - right_function=lambda vis_func: [func(**params) for func, params in vis_func.values()]) + right_function=lambda vis_func: [func(**params) for func, params in vis_func.values()] + ) return history_visualizer.history if return_history else None def shutdown(self): + """Shutdown Dask client""" self.dask_client.close() del self.dask_client diff --git a/fedot_ind/api/utils/api_init.py b/fedot_ind/api/utils/api_init.py index 4ad5845ff..e7537e926 100644 --- a/fedot_ind/api/utils/api_init.py +++ b/fedot_ind/api/utils/api_init.py @@ -2,16 +2,14 @@ from pathlib import Path from fedot.core.repository.tasks import TsForecastingParams -from golem.core.optimisers.adaptive.operator_agent import RandomAgent from pymonad.either import Either from fedot_ind.api.utils.industrial_strategy import IndustrialStrategy from fedot_ind.api.utils.path_lib import DEFAULT_PATH_RESULTS as default_path_to_save_results from fedot_ind.core.architecture.preprocessing.data_convertor import ApiConverter -from fedot_ind.core.architecture.settings.computational import BackendMethods from fedot_ind.core.optimizer.IndustrialEvoOptimizer import IndustrialEvoOptimizer from fedot_ind.core.repository.constanst_repository import \ - FEDOT_API_PARAMS, fedot_init_assumptions, FEDOT_MUTATION_STRATEGY + FEDOT_API_PARAMS, fedot_init_assumptions from fedot_ind.core.repository.model_repository import default_industrial_availiable_operation from fedot_ind.tools.explain.explain import PointExplainer, RecurrenceExplainer @@ -34,12 +32,13 @@ def null_state_object(self): def user_config_object(self, kwargs): self.output_folder = kwargs.get('output_folder', None) - self.industrial_strategy_params = kwargs.get( - 'industrial_strategy_params', {}) - self.industrial_strategy = kwargs.get('industrial_strategy', None) + self.strategy_params = kwargs.get( + 'strategy_params', None) + self.strategy_class = kwargs.get('strategy', None) self.path_to_composition_results = kwargs.get('history_dir', None) self.backend_method = kwargs.get('backend', 'cpu') self.task_params = kwargs.get('task_params', {}) + self.optimizer_params = kwargs.get('optimizer_params', None) def path_object(self, kwargs): # create dirs with results @@ -72,28 +71,28 @@ def path_object(self, kwargs): def industrial_config_object(self, kwargs): # map Fedot params to Industrial params - self.config_dict = kwargs - # self.config_dict['history_dir'] = prefix - self.preset = kwargs.get('preset', self.config_dict['problem']) - self.config_dict['available_operations'] = kwargs.get('available_operations', - default_industrial_availiable_operation(self.preset)) + self.config = kwargs + # self.config['history_dir'] = prefix + self.preset = kwargs.get('preset', self.config['problem']) + self.config['available_operations'] = kwargs.get('available_operations', + default_industrial_availiable_operation(self.preset)) self.is_default_fedot_context = self.preset.__contains__('tabular') - self.is_regression_task_context = self.config_dict['problem'] in ['ts_forecasting', 'regression'] - self.config_dict['cv_folds'] = kwargs.get('cv_folds', 3) - self.config_dict['optimizer'] = kwargs.get('optimizer', IndustrialEvoOptimizer) - self.config_dict['initial_assumption'] = kwargs.get('initial_assumption', None) - if self.config_dict['initial_assumption'] is None: - self.config_dict['initial_assumption'] = Either(value=self.industrial_strategy, - monoid=[self.preset, - self.industrial_strategy == 'anomaly_detection']). \ + self.is_regression_task_context = self.config['problem'] in ['ts_forecasting', 'regression'] + self.config['cv_folds'] = kwargs.get('cv_folds', 3) + self.config['optimizer'] = kwargs.get('optimizer', IndustrialEvoOptimizer) + self.config['initial_assumption'] = kwargs.get('initial_assumption', None) + if self.config['initial_assumption'] is None: + self.config['initial_assumption'] = Either(value=self.strategy_class, + monoid=[self.preset, + self.strategy_class == 'anomaly_detection']). \ either(left_function=fedot_init_assumptions, right_function=fedot_init_assumptions) - self.config_dict['use_input_preprocessing'] = kwargs.get( + self.config['use_input_preprocessing'] = kwargs.get( 'use_input_preprocessing', False) - if self.task_params is not None and self.config_dict['problem'] == 'ts_forecasting': - self.config_dict['task_params'] = TsForecastingParams( + if self.task_params is not None and self.config['problem'] == 'ts_forecasting': + self.config['task_params'] = TsForecastingParams( forecast_length=self.task_params['forecast_length']) self.__init_experiment_setup() @@ -108,36 +107,21 @@ def industrial_api_object(self): # create API subclasses for side task self.condition_check = ApiConverter() self.industrial_strategy_class = IndustrialStrategy( - api_config=self.config_dict, - industrial_strategy=self.industrial_strategy, - industrial_strategy_params=self.industrial_strategy_params, + api_config=self.config, + industrial_strategy=self.strategy_class, + industrial_strategy_params=self.strategy_params, logger=self.logger) - self.industrial_strategy = self.industrial_strategy if self.industrial_strategy != 'anomaly_detection' else None + self.industrial_strategy = self.strategy_class if self.strategy_class != 'anomaly_detection' else None def __init_experiment_setup(self): self.logger.info('Initialising experiment setup') - industrial_params = set(self.config_dict.keys()) - \ + industrial_params = set(self.config.keys()) - \ set(FEDOT_API_PARAMS.keys()) for param in industrial_params: - self.config_dict.pop(param, None) - - backend_method_current, backend_scipy_current = BackendMethods( - self.backend_method).backend - globals()['backend_methods'] = backend_method_current - globals()['backend_scipy'] = backend_scipy_current - - def _check_mutations(self, solver): - for mutation in solver.api_composer.params.optimizer_params.mutation_types.mutation_types: - try: - is_invalid = mutation.__name__.__contains__('resample') - except Exception: - is_invalid = mutation.name.__contains__('resample') - if is_invalid: - solver.api_composer.params.optimizer_params.mutation_types.mutation_types.remove(mutation) - - solver.api_composer.params.optimizer_params.adaptive_mutation_type = RandomAgent( - actions=solver.api_composer.params.optimizer_params.mutation_types, - probs=FEDOT_MUTATION_STRATEGY[ - 'params_mutation_strategy']) - return solver + self.config.pop(param, None) + + # backend_method_current, backend_scipy_current = BackendMethods( + # self.backend_method).backend + # globals()['backend_methods'] = backend_method_current + # globals()['backend_scipy'] = backend_scipy_current diff --git a/fedot_ind/api/utils/checkers_collections.py b/fedot_ind/api/utils/checkers_collections.py index 1b42f199d..0a2571ed2 100644 --- a/fedot_ind/api/utils/checkers_collections.py +++ b/fedot_ind/api/utils/checkers_collections.py @@ -11,7 +11,10 @@ from fedot_ind.api.utils.data import check_multivariate_data from fedot_ind.core.architecture.preprocessing.data_convertor import NumpyConverter, DataConverter from fedot_ind.core.architecture.settings.computational import backend_methods as np +from fedot_ind.core.operation.decomposition.matrix_decomposition.column_sampling_decomposition import CURDecomposition +from fedot_ind.core.operation.transformation.representation.tabular.tabular_extractor import TabularExtractor from fedot_ind.core.repository.constanst_repository import FEDOT_DATA_TYPE, fedot_task +from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels class DataCheck: @@ -36,12 +39,16 @@ def __init__(self, fit_stage=False, industrial_task_params=None): self.logger = logging.getLogger(self.__class__.__name__) - self.industrial_task_params = industrial_task_params or {} - - if len(self.industrial_task_params) != 0: - self.data_type = FEDOT_DATA_TYPE[self.industrial_task_params['data_type']] - else: - self.data_type = FEDOT_DATA_TYPE['tensor'] + self.manager = None + self.strategy_params = industrial_task_params + self.convert_ts_method = {'ts2tabular': self._convert_ts2tabular, + 'ts2image': self._convert_ts2image, + 'big_dataset': self._convert_big_data} + if hasattr(industrial_task_params, 'strategy_params'): + self.strategy_params = industrial_task_params.strategy_params + self.manager = industrial_task_params + self.data_type = FEDOT_DATA_TYPE[self.strategy_params['data_type']] \ + if self.strategy_params is not None else FEDOT_DATA_TYPE['tensor'] self.input_data = input_data self.data_convertor = DataConverter(data=self.input_data) @@ -60,22 +67,10 @@ def __check_features_and_target(self, input_data, data_type): else: X, y = input_data.features, input_data.target - multi_features, X = check_multivariate_data(X) + multi_features, features = check_multivariate_data(X) multi_target = len(y.shape) > 1 and y.shape[1] > 2 - - if multi_features: - features = np.array(X.tolist()).astype(float) - else: - features = X - - if isinstance(y, (pd.DataFrame, pd.Series)): - y = y.values - if multi_target: - target = y - elif multi_features and not multi_target: - target = y.reshape(-1, 1) - else: - target = np.ravel(y).reshape(-1, 1) + target = y.values if isinstance(y, (pd.DataFrame, pd.Series)) else y + target = target.reshape(-1, 1) if multi_features and not multi_target else np.ravel(target).reshape(-1, 1) return features, multi_features, target @@ -124,14 +119,14 @@ def _transformation_for_other_task(self, data_list): len( data_list[0]))) - have_predict_horizon = Either(value=False, monoid=[True, len(self.industrial_task_params) == 0]).either( - left_function=lambda l: self.industrial_task_params['data_type'] == 'time_series' and - 'detection_window' in self.industrial_task_params.keys(), + have_predict_horizon = Either(value=False, monoid=[True, self.strategy_params is None]).either( + left_function=lambda l: self.strategy_params['data_type'] == 'time_series' and + 'detection_window' in self.strategy_params.keys(), right_function=lambda r: r) task = Either( value=fedot_task(self.task), monoid=['ts_forecasting', not have_predict_horizon]).either( - left_function=lambda l: fedot_task(l, self.industrial_task_params['detection_window']), + left_function=lambda l: fedot_task(l, self.strategy_params['detection_window']), right_function=lambda r: r) return InputData(idx=idx, features=input_data[0], @@ -202,6 +197,44 @@ def _check_input_data_target(self): elif self.task == 'classification': self.input_data.target[self.input_data.target == -1] = 0 + def _check_fedot_context(self): + if self.manager is not None: + IndustrialModels().setup_repository() + learning_strategy = self.strategy_params['learning_strategy'] if \ + 'learning_strategy' in self.strategy_params.keys() else None + default_fedot_context = self.manager.is_default_fedot_context \ + and learning_strategy is not None + sampling_strategy = self.strategy_params['sampling_strategy'] \ + if 'sampling_strategy' in self.strategy_params.keys() else None + self.input_data.features = Either(value=learning_strategy, + monoid=[self.input_data, default_fedot_context]).either( + left_function=lambda x: x.features, + right_function=lambda strategy: self.convert_ts_method[strategy] + (self.input_data, sampling_strategy).predict) + + def _convert_ts2tabular(self, input_data, sampling_strategy): + if sampling_strategy is not None: + sample_start, sample_end = list(sampling_strategy['samples'].values()) + channel_start, channel_end = list(sampling_strategy['channels'].values()) + element_start, element_end = list(sampling_strategy['elements'].values()) + input_data.features = self.input_data.features[ + sample_start:sample_end, + channel_start:channel_end, + element_start:element_end] + fg_list = self.manager.strategy_params['feature_generator'] + ts2tabular_model = TabularExtractor({'feature_domain': fg_list, + 'reduce_dimension': False}) + return ts2tabular_model.transform(input_data) + + def _convert_ts2image(self): + pass + + def _convert_big_data(self, input_data, sampling_strategy: dict): + approx_method_dict = {'CUR': CURDecomposition} + approx_method, method_params = list(sampling_strategy.items())[0] + big_dataset_model = approx_method_dict[approx_method](method_params) + return big_dataset_model.transform(input_data) + def check_available_operations(self, available_operations): pass @@ -210,7 +243,9 @@ def _process_input_data(self): if not self.data_convertor.is_torchvision_dataset: self._check_input_data_features() self._check_input_data_target() + self._check_fedot_context() self.input_data.supplementary_data.is_auto_preprocessed = True + return self.input_data def check_input_data(self) -> InputData: diff --git a/fedot_ind/api/utils/industrial_strategy.py b/fedot_ind/api/utils/industrial_strategy.py index 0ef6d01d4..7a584f18d 100644 --- a/fedot_ind/api/utils/industrial_strategy.py +++ b/fedot_ind/api/utils/industrial_strategy.py @@ -22,6 +22,18 @@ class IndustrialStrategy: + """ + Class for industrial strategy implementation + + Args: + industrial_strategy_params: dict + Parameters for industrial strategy + industrial_strategy: str + Industrial strategy name + api_config: dict + Configuration for API + """ + def __init__(self, industrial_strategy_params, industrial_strategy, @@ -60,7 +72,7 @@ def __init__(self, self.ensemble_strategy = list(self.ensemble_strategy_dict.keys()) self.random_label = None - self.config_dict = api_config + self.config = api_config self.logger = logging.getLogger('IndustrialStrategy') self.kernel_ensembler = KernelEnsembler self.RAF_workers = None @@ -98,12 +110,12 @@ def _federated_strategy(self, input_data): batch_size = round(input_data.features.shape[0] / self.RAF_workers) min_timeout = 0.5 - selected_timeout = round(self.config_dict['timeout'] / FEDOT_WORKER_TIMEOUT_PARTITION) - self.config_dict['timeout'] = max(min_timeout, selected_timeout) + selected_timeout = round(self.config['timeout'] / FEDOT_WORKER_TIMEOUT_PARTITION) + self.config['timeout'] = max(min_timeout, selected_timeout) self.logger.info(f'Batch_size - {batch_size}. Number of batches - {self.RAF_workers}') - self.solver = RAFEnsembler(composing_params=self.config_dict, + self.solver = RAFEnsembler(composing_params=self.config, n_splits=self.RAF_workers, batch_size=batch_size) self.logger.info( @@ -114,7 +126,7 @@ def _federated_strategy(self, input_data): else: self.logger.info(f'RAF algorithm is not applicable: n_samples={n_samples} < {BATCH_SIZE_FOR_FEDOT_WORKER}. ' f'FEDOT algorithm was applied') - self.solver = Fedot(**self.config_dict) + self.solver = Fedot(**self.config) self.solver.fit(input_data) def _forecasting_strategy(self, input_data): @@ -125,8 +137,8 @@ def _forecasting_strategy(self, input_data): {}).fit(input_data) for model_name, model_impl in FEDOT_TS_FORECASTING_ASSUMPTIONS.items()} self.solver = self._finetune_loop(kernel_model, kernel_data, self.finetune_params) # for model_name, init_assumption in FEDOT_TS_FORECASTING_ASSUMPTIONS.items(): - # self.config_dict['initial_assumption'] = init_assumption.build() - # industrial = Fedot(**self.config_dict) + # self.config['initial_assumption'] = init_assumption.build() + # industrial = Fedot(**self.config) # Maybe( # value=industrial.fit(input_data), # monoid=True).maybe( @@ -144,7 +156,7 @@ def _sampling_strategy(self, input_data): target=input_data.target, sampling_rate=sampling_rate) input_data.idx = np.arange(len(input_data.features)) - industrial = Fedot(**self.config_dict) + industrial = Fedot(**self.config) Maybe( value=industrial.fit(input_data), monoid=True).maybe( @@ -157,7 +169,7 @@ def _forecasting_exogenous_strategy(self, input_data): self.logger.info('TS exogenous forecasting algorithm was applied') self.solver = {} init_assumption = PipelineBuilder().add_node('lagged', 0) - task = FEDOT_TASK[self.config_dict['problem']] + task = FEDOT_TASK[self.config['problem']] train_lagged, predict_lagged = train_test_data_setup(InputData(idx=np.arange(len(input_data.features)), features=input_data.features, target=input_data.features, @@ -173,13 +185,13 @@ def _forecasting_exogenous_strategy(self, input_data): target=input_data.features, task=task, data_type=DataTypesEnum.ts), 2) - dataset_dict.update({f'exog_ts': train_exog}) + dataset_dict.update({'exog_ts': train_exog}) train_dataset = MultiModalData(dataset_dict) init_assumption = init_assumption.join_branches('ridge') - self.config_dict['initial_assumption'] = init_assumption.build() + self.config['initial_assumption'] = init_assumption.build() - industrial = Fedot(**self.config_dict) + industrial = Fedot(**self.config) industrial.fit(train_dataset) self.solver = {'exog_model': industrial} @@ -188,7 +200,7 @@ def _finetune_loop(self, kernel_data: dict, tuning_params: dict = {}): tuned_models = {} - tuning_params['metric'] = FEDOT_TUNING_METRICS[self.config_dict['problem']] + tuning_params['metric'] = FEDOT_TUNING_METRICS[self.config['problem']] for generator, kernel_model in kernel_ensemble.items(): tuning_params['tuner'] = FEDOT_TUNER_STRATEGY['simultaneous'] model_to_tune = deepcopy(kernel_model) @@ -198,10 +210,8 @@ def _finetune_loop(self, return tuned_models def _kernel_strategy(self, input_data): - self.kernel_ensembler = KernelEnsembler( - self.industrial_strategy_params) - kernel_ensemble, kernel_data = self.kernel_ensembler.transform( - input_data).predict + self.kernel_ensembler = KernelEnsembler(self.industrial_strategy_params) + kernel_ensemble, kernel_data = self.kernel_ensembler.transform(input_data).predict self.solver = self._finetune_loop(kernel_ensemble, kernel_data) def _lora_strategy(self, input_data): diff --git a/fedot_ind/api/utils/recurrent_image.py b/fedot_ind/api/utils/recurrent_image.py index 86870163c..e986448a7 100644 --- a/fedot_ind/api/utils/recurrent_image.py +++ b/fedot_ind/api/utils/recurrent_image.py @@ -1,11 +1,11 @@ import os import numpy as np -from matplotlib import pyplot as plt from PIL import Image +from matplotlib import pyplot as plt from fedot_ind.api.utils.data import init_input_data -from fedot_ind.core.models.recurrence.reccurence_extractor import RecurrenceExtractor +from fedot_ind.core.operation.transformation.representation.recurrence.reccurence_extractor import RecurrenceExtractor from fedot_ind.tools.loader import DataLoader diff --git a/fedot_ind/core/architecture/abstraction/decorators.py b/fedot_ind/core/architecture/abstraction/decorators.py index 1e854be56..c2008981d 100644 --- a/fedot_ind/core/architecture/abstraction/decorators.py +++ b/fedot_ind/core/architecture/abstraction/decorators.py @@ -113,9 +113,9 @@ class DaskServer(metaclass=Singleton): def __init__(self): print('Creating Dask Server') cluster = LocalCluster(processes=False, - # n_workers=4, - # threads_per_worker=4, - # memory_limit='3GB' + n_workers=4, + threads_per_worker=4, + memory_limit='auto' ) # connect client to your cluster self.client = Client(cluster) diff --git a/fedot_ind/core/architecture/pipelines/abstract_pipeline.py b/fedot_ind/core/architecture/pipelines/abstract_pipeline.py index f3340fce4..579a5b214 100644 --- a/fedot_ind/core/architecture/pipelines/abstract_pipeline.py +++ b/fedot_ind/core/architecture/pipelines/abstract_pipeline.py @@ -45,9 +45,14 @@ def create_pipeline(node_list, build: bool = True): for branch, nodes in node_list.items(): if isinstance(branch, int): for node in nodes: - pipeline.add_node(node, branch_idx=branch) + if isinstance(node, tuple): + pipeline.add_node(operation_type=node[0], params=node[1], branch_idx=branch) + else: + pipeline.add_node(operation_type=node, branch_idx=branch) else: pipeline.join_branches(nodes) + elif isinstance(node_list, PipelineBuilder): + return pipeline else: for node in node_list: pipeline.add_node(node) diff --git a/fedot_ind/core/architecture/preprocessing/data_convertor.py b/fedot_ind/core/architecture/preprocessing/data_convertor.py index e1c836981..6e891bdd3 100644 --- a/fedot_ind/core/architecture/preprocessing/data_convertor.py +++ b/fedot_ind/core/architecture/preprocessing/data_convertor.py @@ -405,7 +405,10 @@ def have_fit_method(self): @property def have_predict_method(self): - return dir(self.operation_example).__contains__('predict') + if hasattr(self.operation_example, 'predict'): + return True if callable(self.operation_example.predict) else False + else: + return False @property def have_predict_for_fit_method(self): diff --git a/fedot_ind/core/architecture/settings/pipeline_factory.py b/fedot_ind/core/architecture/settings/pipeline_factory.py index e11f305f1..e6190a05a 100644 --- a/fedot_ind/core/architecture/settings/pipeline_factory.py +++ b/fedot_ind/core/architecture/settings/pipeline_factory.py @@ -3,12 +3,12 @@ from fedot_ind.core.models.detection.probalistic.kalman import UnscentedKalmanFilter from fedot_ind.core.models.detection.subspaces.func_pca import FunctionalPCA from fedot_ind.core.models.detection.subspaces.sst import SingularSpectrumTransformation -from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor -from fedot_ind.core.models.recurrence.reccurence_extractor import RecurrenceExtractor -from fedot_ind.core.models.topological.topological_extractor import TopologicalExtractor from fedot_ind.core.operation.transformation.basis.eigen_basis import EigenBasisImplementation from fedot_ind.core.operation.transformation.basis.fourier import FourierBasisImplementation from fedot_ind.core.operation.transformation.basis.wavelet import WaveletBasisImplementation +from fedot_ind.core.operation.transformation.representation.recurrence.reccurence_extractor import RecurrenceExtractor +from fedot_ind.core.operation.transformation.representation.statistical.quantile_extractor import QuantileExtractor +from fedot_ind.core.operation.transformation.representation.topological.topological_extractor import TopologicalExtractor class BasisTransformations(Enum): @@ -30,31 +30,31 @@ class MlModel(Enum): class KernelFeatureGenerator(Enum): - quantile = [{'feature_generator_type': 'quantile', + quantile = [{'feature_generator_type': 'statistical', 'feature_hyperparams': { 'window_mode': True, 'window_size': 5 } }, - {'feature_generator_type': 'quantile', + {'feature_generator_type': 'statistical', 'feature_hyperparams': { 'window_mode': True, 'window_size': 10 } }, - {'feature_generator_type': 'quantile', + {'feature_generator_type': 'statistical', 'feature_hyperparams': { 'window_mode': True, 'window_size': 20 } }, - {'feature_generator_type': 'quantile', + {'feature_generator_type': 'statistical', 'feature_hyperparams': { 'window_mode': True, 'window_size': 30 } }, - {'feature_generator_type': 'quantile', + {'feature_generator_type': 'statistical', 'feature_hyperparams': { 'window_mode': True, 'window_size': 40 diff --git a/fedot_ind/core/ensemble/kernel_ensemble.py b/fedot_ind/core/ensemble/kernel_ensemble.py index b5aff8db3..e53592461 100644 --- a/fedot_ind/core/ensemble/kernel_ensemble.py +++ b/fedot_ind/core/ensemble/kernel_ensemble.py @@ -1,22 +1,44 @@ from copy import deepcopy -from typing import Optional, Any +from typing import Any, Optional import pandas as pd -from MKLpy.callbacks import EarlyStopping -from MKLpy.scheduler import ReduceOnWorsening from fedot.core.data.data import InputData from fedot.core.operations.operation_parameters import OperationParameters from fedot.core.pipelines.pipeline_builder import PipelineBuilder +from MKLpy.callbacks import EarlyStopping +from MKLpy.scheduler import ReduceOnWorsening from scipy.spatial.distance import pdist, squareform from sklearn.svm import SVC from fedot_ind.core.architecture.settings.computational import backend_methods as np from fedot_ind.core.models.base_extractor import BaseExtractor -from fedot_ind.core.repository.constanst_repository import KERNEL_ALGO, KERNEL_BASELINE_FEATURE_GENERATORS, \ - KERNEL_BASELINE_NODE_LIST, KERNEL_DISTANCE_METRIC, get_default_industrial_model_params +from fedot_ind.core.repository.constanst_repository import ( + KERNEL_ALGO, + KERNEL_BASELINE_FEATURE_GENERATORS, + KERNEL_BASELINE_NODE_LIST, + KERNEL_DISTANCE_METRIC, + get_default_industrial_model_params, +) class KernelEnsembler(BaseExtractor): + """ + Class for kernel ensembling. This class implements a kernel-based ensemble method for feature + extraction and classification. It supports both one-stage and two-stage kernel learning + strategies and can handle multiclass classification problems. + + Args: + params (Optional[OperationParameters]): Parameters of the operation + + Attributes: + distance_metric (str): The distance metric used to calculate the Gram matrix + kernel_strategy (str): The kernel learning strategy used by the model + learning_strategy (str): The learning strategy used by the model + head_model (str): The head model used by the model + feature_extractor (List[str]): The feature extractors used by the model + + """ + def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) self.distance_metric = params.get('distance_metric', KERNEL_DISTANCE_METRIC['default_metric']) @@ -26,16 +48,14 @@ def __init__(self, params: Optional[OperationParameters] = None): self.feature_extractor = params.get('feature_extractor', list( KERNEL_BASELINE_FEATURE_GENERATORS.keys())) - self._mapping_dict = {k: v for k, - v in enumerate(self.feature_extractor)} + self._mapping_dict = {k: v for k, v in enumerate(self.feature_extractor)} self.lr = params.get('learning_rate', 0.1) self.patience = params.get('patience', 5) self.epoch = params.get('epoch', 500) self.optimisation_metric = params.get('optimisation_metric', 'roc_auc') self.algo_impl_dict = {'one_step': self.__one_stage_kernel, - 'two_step': self.__two_stage_kernel - } + 'two_step': self.__two_stage_kernel} self.feature_matrix_train = [] self.feature_matrix_test = [] @@ -129,16 +149,21 @@ def _transform(self, input_data: InputData) -> np.array: """ self.__multiclass_check(input_data.target) grammian_list = self.generate_grammian(input_data) + if self.kernel_strategy.__contains__('one'): - kernel_weight_matrix = self.__one_stage_kernel( - grammian_list, input_data.target) + kernel_weight_matrix = self.__one_stage_kernel(grammian_list, input_data.target) + else: - kernel_weight_matrix = self.__two_stage_kernel( - grammian_list, input_data.target) - top_n_generators, classes_described_by_generator = self._select_top_feature_generators( - kernel_weight_matrix) + kernel_weight_matrix = self.__two_stage_kernel(grammian_list, input_data.target) + + top_n_generators, classes_described_by_generator = self._select_top_feature_generators(kernel_weight_matrix) + self.predict = self._create_kernel_ensemble( - input_data, top_n_generators, classes_described_by_generator) + input_data, + top_n_generators, + classes_described_by_generator + ) + return self.predict def generate_grammian(self, input_data) -> list[Any]: @@ -148,8 +173,8 @@ def generate_grammian(self, input_data) -> list[Any]: self.feature_matrix_train = [ x.reshape( x.shape[0], - x.shape[1] * - x.shape[2]) for x in self.feature_matrix_train] + x.shape[1] * x.shape[2] + ) for x in self.feature_matrix_train] KLtr = [squareform(pdist(X=feature, metric=self.distance_metric)) for feature in self.feature_matrix_train] return KLtr diff --git a/fedot_ind/core/metrics/evaluation.py b/fedot_ind/core/metrics/evaluation.py index 1b723e142..c29a073bf 100644 --- a/fedot_ind/core/metrics/evaluation.py +++ b/fedot_ind/core/metrics/evaluation.py @@ -1,9 +1,21 @@ import logging from enum import Enum -from typing import Dict, List - - -from fedot_ind.core.metrics.metrics_implementation import * +from typing import Dict, List, Union + +import numpy as np + +from fedot_ind.core.metrics.metrics_implementation import ( + F1, + MAE, + MAPE, + MSE, + R2, + RMSE, + ROCAUC, + Accuracy, + Logloss, + Precision, +) class Metrics(Enum): diff --git a/fedot_ind/core/metrics/metrics_implementation.py b/fedot_ind/core/metrics/metrics_implementation.py index fea9c2877..71d77935e 100644 --- a/fedot_ind/core/metrics/metrics_implementation.py +++ b/fedot_ind/core/metrics/metrics_implementation.py @@ -1,23 +1,37 @@ -from typing import Optional -from typing import Union +from typing import Optional, Union import numpy as np import pandas as pd from fedot.core.data.data import InputData from fedot.core.operations.operation_parameters import OperationParameters from golem.core.dag.graph import Graph -from sklearn.metrics import (accuracy_score, f1_score, - log_loss, mean_absolute_error, - mean_absolute_percentage_error, - mean_squared_error, mean_squared_log_error, - precision_score, r2_score, roc_auc_score) -from sklearn.metrics import d2_absolute_error_score, explained_variance_score, max_error, median_absolute_error +from sklearn.metrics import ( + accuracy_score, + d2_absolute_error_score, + explained_variance_score, + f1_score, + log_loss, + max_error, + mean_absolute_error, + mean_absolute_percentage_error, + mean_squared_error, + mean_squared_log_error, + median_absolute_error, + precision_score, + r2_score, + roc_auc_score, +) from sktime.performance_metrics.forecasting import mean_absolute_scaled_error from fedot_ind.core.architecture.settings.computational import backend_methods as np + # from fedot_ind.core.architecture.preprocessing.data_convertor import DataConverter -from fedot_ind.core.metrics.anomaly_detection.function import single_average_delay, \ - single_evaluate_nab, single_detecting_boundaries, check_errors +from fedot_ind.core.metrics.anomaly_detection.function import ( + check_errors, + single_average_delay, + single_detecting_boundaries, + single_evaluate_nab, +) class ParetoMetrics: @@ -83,7 +97,7 @@ def metric(self) -> float: return mean_squared_error( y_true=self.target, y_pred=self.predicted_labels, - squared=False) + squared=False) ** 0.5 class SMAPE(QualityMetric): @@ -228,8 +242,13 @@ def mape(A, F): def calculate_regression_metric(target, labels, rounding_order=3, - metric_names=('r2', 'rmse', 'mae'), + metric_names=None, **kwargs): + + # Set default metrics + if metric_names is None: + metric_names = ('r2', 'rmse', 'mae') + target = target.astype(float) def rmse(y_true, y_pred): @@ -256,11 +275,14 @@ def rmse(y_true, y_pred): def calculate_forecasting_metric(target, labels, rounding_order=3, - metric_names=('smape', 'rmse', - 'mape'), + metric_names=None, **kwargs): target = target.astype(float) + # Set default metrics + if metric_names is None: + metric_names = ('smape', 'rmse', 'mape') + def rmse(y_true, y_pred): return np.sqrt(mean_squared_error(y_true, y_pred)) @@ -285,18 +307,20 @@ def calculate_classification_metric( labels, probs, rounding_order=3, - metric_names=( - 'f1', - # 'roc_auc', - 'accuracy')): + metric_names=('f1', 'accuracy')): + + # Set default metrics + if metric_names is None: + metric_names = ('f1', 'accuracy') + metric_dict = {'accuracy': Accuracy, 'f1': F1, # 'roc_auc': ROCAUC, 'precision': Precision, 'logloss': Logloss} - df = pd.DataFrame({name: func(target, labels, probs).metric( - ) for name, func in metric_dict.items() if name in metric_names}, index=[0]) + df = pd.DataFrame({name: func(target, labels, probs).metric() + for name, func in metric_dict.items() if name in metric_names}, index=[0]) return df.round(rounding_order) diff --git a/fedot_ind/core/models/base_extractor.py b/fedot_ind/core/models/base_extractor.py index a273c6675..013852294 100644 --- a/fedot_ind/core/models/base_extractor.py +++ b/fedot_ind/core/models/base_extractor.py @@ -1,17 +1,17 @@ import logging import math -from itertools import chain from multiprocessing import cpu_count +import dask from fedot.core.data.data import InputData from fedot.core.repository.dataset_types import DataTypesEnum -from joblib import delayed, Parallel from numpy.lib import stride_tricks as stride_repr +from tqdm.dask import TqdmCallback from fedot_ind.api.utils.data import init_input_data -from fedot_ind.core.architecture.abstraction.decorators import convert_to_input_data from fedot_ind.core.metrics.metrics_implementation import * from fedot_ind.core.operation.IndustrialCachableOperation import IndustrialCachableOperationImplementation +from fedot_ind.core.operation.filtration.feature_filtration import FeatureSpaceReducer from fedot_ind.core.operation.transformation.data.hankel import HankelMatrix from fedot_ind.core.repository.constanst_repository import STAT_METHODS, STAT_METHODS_GLOBAL @@ -23,49 +23,62 @@ class BaseExtractor(IndustrialCachableOperationImplementation): def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) - self.current_window = None - self.stride = 3 - self.n_processes = math.ceil(cpu_count() * 0.7) if cpu_count() > 1 else 1 - self.data_type = DataTypesEnum.table self.use_cache = self.params.get('use_cache', False) self.use_sliding_window = self.params.get('use_sliding_window', True) + self.use_feature_filter = self.params.get('use_feature_filter', False) + self.feature_filter = FeatureSpaceReducer() + self.data_type = DataTypesEnum.table + + self.current_window = None self.relevant_features = None + self.predict = None + + self.stride = 3 + self.n_processes = math.ceil(cpu_count() * 0.7) if cpu_count() > 1 else 1 + self.logger = logging.getLogger(self.__class__.__name__) self.logging_params = {'jobs': self.n_processes} - self.predict = None + + def __repr__(self): + return 'Abstract Class for TS representation' def fit(self, input_data: InputData): pass def extract_features(self, x, y) -> pd.DataFrame: """ - For those cases when you need to use feature extractor as a stangalone object + For those cases when you need to use feature extractor as a standalone object """ input_data = init_input_data(x, y) transformed_features = self.transform(input_data, use_cache=self.use_cache) try: - return pd.DataFrame(transformed_features.predict, columns=self.relevant_features) + return pd.DataFrame(transformed_features.predict.squeeze(), columns=self.relevant_features) except ValueError: - return pd.DataFrame(transformed_features.predict) + return pd.DataFrame(transformed_features.predict.squeeze()) def _transform(self, input_data: InputData) -> np.array: """ Method for feature generation for all series """ - parallel = Parallel(n_jobs=self.n_processes, verbose=0, pre_dispatch="2*n_jobs") - feature_matrix = parallel( - delayed(self.generate_features_from_ts)(sample) for sample in input_data.features - ) - if len(feature_matrix[0].features.shape) > 1: - stacked_data = np.stack([ts.features for ts in feature_matrix]) + evaluation_results = list(map(lambda sample: self.generate_features_from_ts(sample), input_data.features)) + with TqdmCallback(desc=fr"compute_feature_extraction_with_{self.__repr__()}"): + feature_matrix = dask.compute(*evaluation_results) + if len(feature_matrix[0].shape) > 1: + stacked_data = np.stack(feature_matrix) self.predict = self._clean_predict(stacked_data) else: - stacked_data = np.array([ts.features for ts in feature_matrix]) + stacked_data = np.array(feature_matrix) self.predict = self._clean_predict(stacked_data) self.predict = self.predict.reshape(self.predict.shape[0], -1) + # self.relevant_features = feature_matrix[0].supplementary_data['feature_name'] + + if self.use_feature_filter: + if not self.feature_filter.is_fitted: + self.predict = self.feature_filter.reduce_feature_space(self.predict) + else: + self.predict = self.predict[:, :, self.feature_filter.feature_mask] - self.relevant_features = feature_matrix[0].supplementary_data['feature_name'] return self.predict def _clean_predict(self, predict: np.array): @@ -81,10 +94,9 @@ def generate_features_from_ts(self, ts_frame: np.array, window_length: int = Non Method responsible for generation of features from time series. """ - @convert_to_input_data def get_statistical_features(self, time_series: np.ndarray, add_global_features: bool = False) -> tuple: """ - Method for creating baseline quantile features for a given time series. + Method for creating baseline statistical features for a given time series. Args: add_global_features: if True, global features are added to the feature set @@ -94,20 +106,13 @@ def get_statistical_features(self, time_series: np.ndarray, add_global_features: InputData: object with features """ - names = [] - features = [] time_series = time_series.flatten() list_of_methods = [*STAT_METHODS_GLOBAL.items()] if add_global_features else [*STAT_METHODS.items()] + return list(map(lambda method: method[1](time_series), list_of_methods)) - for method in list_of_methods: - features.append(method[1](time_series)) - names.append(method[0]) - return features, names - - @convert_to_input_data def apply_window_for_stat_feature(self, ts_data: np.array, feature_generator: callable, - window_size: int = None) -> tuple: + window_size: int = None) -> np.ndarray: window_size = round(ts_data.shape[0] / 10) if window_size is None \ else round(ts_data.shape[0] * (window_size / 100)) @@ -124,30 +129,13 @@ def apply_window_for_stat_feature(self, ts_data: np.array, if subseq_set is None: ts_slices = list(range(0, ts_data.shape[0], window_size)) features = list(map(lambda slice: feature_generator(ts_data[slice:slice + window_size]), ts_slices)) - names = list(map(lambda ts_tup: [x + f'_on_interval: {ts_tup[1] + 1} - {ts_tup[1] + 1 + window_size}' - for x in ts_tup[0].supplementary_data['feature_name']], - zip(features, ts_slices))) - features = [x.features for x in features] - else: ts_slices = list(range(0, subseq_set.shape[1])) features = list(map(lambda slice: feature_generator(subseq_set[:, slice]), ts_slices)) - names = list(map(lambda ts_tup: [x + f'_on_interval: {ts_tup[1] + 1} - {ts_tup[1] + 1 + window_size}' - for x in ts_tup[0].supplementary_data['feature_name']], - zip(features, ts_slices))) - features = [x.features for x in features] - - return features, names - - @convert_to_input_data - def _get_feature_matrix(self, extraction_func: callable, ts: np.array) -> tuple: - multi_ts_stat_features = [extraction_func(x) for x in ts] - for component in multi_ts_stat_features: - if not isinstance(component.features, np.ndarray): - component.features = np.array(component.features) - features = np.concatenate([component.features.reshape(1, -1) for component in multi_ts_stat_features], axis=0) - - for index, component in enumerate(multi_ts_stat_features): - component.supplementary_data['feature_name'] = [f'component {index}'] - names = list(chain(*[x.supplementary_data['feature_name'] for x in multi_ts_stat_features])) - return features, names + return features + + def _get_feature_matrix(self, extraction_func: callable, ts: np.array) -> np.ndarray: + multi_channel_features = [extraction_func(x) for x in ts] + features = np.concatenate([channel_feature.reshape(1, -1) + for channel_feature in multi_channel_features], axis=0) + return features diff --git a/fedot_ind/core/models/nn/network_impl/base_nn_model.py b/fedot_ind/core/models/nn/network_impl/base_nn_model.py index de83a2f06..fbd02dcf4 100644 --- a/fedot_ind/core/models/nn/network_impl/base_nn_model.py +++ b/fedot_ind/core/models/nn/network_impl/base_nn_model.py @@ -207,12 +207,12 @@ def _predict_model(self, x_test, output_mode: str = 'default'): def _convert_predict(self, pred, output_mode: str = 'labels'): have_encoder = all([self.label_encoder is not None, output_mode == 'labels']) - output_is_clf_labels = all([not self.is_regression_task, output_mode == 'labels']) + output_is_clf_labels = output_mode == 'labels' and self.is_regression_task - pred = pred.cpu().detach().numpy() if self.is_regression_task else F.softmax(pred, dim=1) - y_pred = torch.argmax(pred, dim=1).cpu().detach().numpy() if output_is_clf_labels else pred + pred = pred if self.is_regression_task else F.softmax(pred, dim=1) + y_pred = torch.argmax(pred, dim=1) if output_is_clf_labels else pred y_pred = self.label_encoder.inverse_transform(y_pred) if have_encoder else y_pred - + y_pred = y_pred.cpu().detach().numpy() predict = OutputData( idx=np.arange(len(y_pred)), task=self.task_type, diff --git a/fedot_ind/core/models/pdl/__init__.py b/fedot_ind/core/models/pdl/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/fedot_ind/core/models/pdl/pairwise_model.py b/fedot_ind/core/models/pdl/pairwise_model.py new file mode 100644 index 000000000..2ae7ace31 --- /dev/null +++ b/fedot_ind/core/models/pdl/pairwise_model.py @@ -0,0 +1,534 @@ +from copy import deepcopy +from typing import Optional, Union + +import numpy as np +import pandas as pd +import sklearn.base +from fedot.core.data.data import InputData +from fedot.core.operations.operation_parameters import OperationParameters +from pymonad.either import Either +from scipy.special import softmax + +from fedot_ind.core.repository.constanst_repository import SKLEARN_CLF_IMP, SKLEARN_REG_IMP + + +class PairwiseDifferenceEstimator: + """ + Base class for Pairwise Difference Learning. + """ + + def _convert_to_pandas(self, arr1, arr2): + if isinstance(arr1, np.ndarray) or isinstance(arr2, np.ndarray): + arr1, arr2 = pd.DataFrame(arr1), pd.DataFrame(arr2) + return arr1, arr2 + + def _to_pandas_regression(self, *args): + return (data if data is None or isinstance(data, (pd.DataFrame, pd.Series)) else pd.DataFrame(data) for data in + args) + + def _pair_data_regression(self, X1, X2, y1=None, y2=None): + X1, y1, X2, y2 = self._to_pandas_regression(X1, y1, X2, y2) + + X_pair = X1.merge(X2, how="cross") + x1_pair = X_pair[[f'{column}_x' for column in X1.columns]].rename( + columns={f'{column}_x': f'{column}_diff' for column in X1.columns}) + x2_pair = X_pair[[f'{column}_y' for column in X1.columns]].rename( + columns={f'{column}_y': f'{column}_diff' for column in X1.columns}) + X_pair = pd.concat([X_pair, x1_pair - x2_pair], axis='columns') + # Symmetric + x2_pair_sym = X_pair[[f'{column}_x' for column in X1.columns]].rename( + columns={f'{column}_x': f'{column}_y' for column in X1.columns}) + x1_pair_sym = X_pair[[f'{column}_y' for column in X1.columns]].rename( + columns={f'{column}_y': f'{column}_x' for column in X1.columns}) + X_pair_sym = pd.concat([x1_pair_sym, x2_pair_sym, x2_pair - x1_pair], axis='columns') + + if y1 is not None: + assert isinstance(y1, pd.Series) or y1.shape[1] == 1, f"Didn't expect more than one output {y1.shape}" + assert isinstance(y2, pd.Series) or y2.shape[1] == 1, f"Didn't expect more than one output {y2.shape}" + + y_pair = pd.DataFrame(y1).merge(y2, how="cross") + y_pair_diff = y_pair.iloc[:, 0] - y_pair.iloc[:, 1] + else: + y_pair_diff = None + + return X_pair, X_pair_sym, y_pair_diff + + @staticmethod + def _get_pair_feature_names(features: list) -> list: + """ Get the new name of features after pairing points. """ + return [f'{name}_x' for name in features] + [f'{name}_y' for name in features] + + def pair_input(self, X1: Union[np.ndarray, pd.Series], + X2: Union[np.ndarray, pd.Series]): + X1, X2 = self._convert_to_pandas(X1, X2) + X_pair = X1.merge(X2, how="cross") + x1_pair = X_pair[[f'{column}_x' for column in X1.columns]].rename(columns={f'{column}_x': f'{column}_diff' + for column in X1.columns}) + x2_pair = X_pair[[f'{column}_y' for column in X1.columns]].rename(columns={f'{column}_y': f'{column}_diff' + for column in X1.columns}) + try: + calculate_difference = x1_pair - x2_pair + except BaseException: + raise ValueError( + "PairwiseDifference: The input data is not compatible with the subtraction operation." + " Either transform all data to numeric features or use a ColumnTransformer to transform the data.") + # It means that the input data is not compatible with the subtraction operation. + # Simply turn all your data into numbers + + X_pair = pd.concat([X_pair, calculate_difference], axis='columns') + # Symmetric + x2_pair_sym = X_pair[[f'{column}_x' for column in X1.columns]].rename(columns={f'{column}_x': f'{column}_y' + for column in X1.columns}) + x1_pair_sym = X_pair[[f'{column}_y' for column in X1.columns]].rename(columns={f'{column}_y': f'{column}_x' + for column in X1.columns}) + X_pair_sym = pd.concat([x1_pair_sym, x2_pair_sym, x2_pair - x1_pair], axis='columns') + # distances = cdist(X1, cluster_centers) + return X_pair, X_pair_sym + + def pair_output(self, + y1: Union[np.ndarray, pd.Series], + y2: Union[np.ndarray, pd.Series]) -> np.ndarray: + """For regresion. beware this is different from regression this is b-a not a-b""" + + y1, y2 = self._convert_to_pandas(y1, y2) + y_pair = pd.DataFrame(y1).merge(y2, how="cross") + y_pair_diff = y_pair.iloc[:, 1] - y_pair.iloc[:, 0] + return y_pair_diff.values + + def pair_output_difference(self, + y1: Union[np.ndarray, pd.Series], + y2: Union[np.ndarray, pd.Series], + nb_classes: int) -> np.ndarray: + """For MultiClassClassification base on difference only""" + y1, y2 = self._convert_to_pandas(y1, y2) + y_pair = pd.DataFrame(y1).merge(y2, how="cross") + y_pair_diff = (y_pair.iloc[:, 1] != y_pair.iloc[:, 0]).astype(int) + assert y_pair_diff.nunique() <= 2, f'should only be 0s and 1s {y_pair_diff.unique()}' + return y_pair_diff.values + + @staticmethod + def get_pair_feature_names(features: list) -> list: + """ Get the new name of features after pairing points. """ + return [f'{name}_x' for name in features] + [f'{name}_y' for name in features] + + @staticmethod + def check_output(y: pd.Series) -> None: + assert y is not None + assert isinstance(y, pd.Series) + assert 'uint' not in str(y.dtype), y.dtype + assert isinstance(y, pd.Series) or y.shape[1] == 1, f"Didn't expect more than one output {y.shape}" + assert y.nunique() > 1, y.nunique() + if y.name is None: + # just put any name to the output to avoid a bug later + y.name = 'output' + + @staticmethod + def check_sample_weight(sample_weight: pd.Series, y_train: pd.Series) -> None: + if sample_weight is None: + pass + elif isinstance(sample_weight, pd.Series): + # check + if len(sample_weight) != len(y_train): + raise ValueError( + f'sample_weight size {len(sample_weight)} should be equal to the train size {len(y_train)}') + if not sample_weight.index.equals(y_train.index): + raise ValueError( + f'sample_weight and y_train must have the same index\n{sample_weight.index}\n{y_train.index}') + if all(sample_weight.fillna(0) <= 0): + raise ValueError(f'sample_weight are all negative/Nans.\n{sample_weight}') + + # norm + class_sums = np.bincount(y_train, sample_weight) + sample_weight = sample_weight / class_sums[y_train.astype(int)] + else: + raise NotImplementedError() + + @staticmethod + def correct_sample_weight(sample_weight: pd.Series, y_train: pd.Series) -> pd.Series: + if sample_weight is not None: + sample_weight = sample_weight / sum(sample_weight) + # norm + # class_sums = np.bincount(y_train, sample_weight) + # sample_weight = sample_weight / class_sums[y_train.astype(int)] + + # # if sample_weight.min() < 0: # dolla weight change : improvement +0.0032 bof + # # sample_weight = sample_weight - sample_weight.min() + return sample_weight + + @staticmethod + def predict(y_prob: np.ndarray, output_mode: str = 'default', min_label_zero: bool = True): + if output_mode.__contains__('label'): + predicted_classes = np.argmax(y_prob, axis=1)[..., np.newaxis] + predicted_classes = predicted_classes if min_label_zero else predicted_classes + 1 + else: + predicted_classes = y_prob + return predicted_classes + + +class PairwiseDifferenceClassifier: + """PDL have a low chance of improvement compared to using directly parametric models like Logit, MLP. \ + To obtain an improvement, it is better to use a tree-based model like: ExtraTrees""" + + def __init__(self, params: Optional[OperationParameters] = None): + self.base_model_params = deepcopy(params._parameters) + del self.base_model_params['model'] + self.base_model = SKLEARN_CLF_IMP[params.get('model', 'rf')](**self.base_model_params) + self.pde = PairwiseDifferenceEstimator() + self.is_model_have_prob_output = hasattr(self.base_model, 'predict_proba') + self.prior = None + self.use_prior = False + self.proba_aggregate_method = 'norm' + self.sample_weight_ = None + + def _check_target(self): + if self.target.min() != 0: + self.target_start_zero = False + else: + self.target_start_zero = True + + def _estimate_prior(self): + if self.prior is not None: + return self + # Calculate class priors + target = pd.DataFrame(self.target) + class_counts = target.value_counts() + class_priors = class_counts / len(self.target) + # Convert class priors to a dictionary + self.prior = class_priors.sort_index().values + + def fit(self, + input_data: InputData): + self.num_classes = input_data.num_classes + self.target = input_data.target + self.task_type = input_data.task + self.is_regression_task = self.task_type.task_type.value == 'regression' + self.classes_ = sklearn.utils.multiclass.unique_labels(input_data.target) + self.train_features = input_data.features # Store the classes seen during fit + self._estimate_prior() + self._check_target() + X_pair, _ = self.pde.pair_input(input_data.features, input_data.features) + y_pair_diff = self.pde.pair_output_difference(self.target, self.target, self.num_classes) + + self.base_model.fit(X_pair, y_pair_diff) + return self + + def predict_similarity_samples(self, X: pd.DataFrame, X_anchors=None) -> pd.DataFrame: + """ For each input sample, output C probabilities for each N train pair. + Beware that this function does not apply the weights at this level + """ + if X_anchors is None: + X_anchors = self.train_features + + X_pair, X_pair_sym = self.pde.pair_input(X, X_anchors) + if self.is_model_have_prob_output: + predict_proba = self.base_model.predict_proba + else: + def predict_proba(X) -> np.ndarray: + predictions = self.base_model.predict(X) + predictions = predictions.astype(int) + n_samples = len(predictions) + proba = np.zeros((n_samples, 2), dtype=float) + proba[range(n_samples), predictions] = 1. + return proba + + predictions_proba_difference: np.ndarray = predict_proba(X_pair) + predictions_proba_difference_sym: np.ndarray = predict_proba(X_pair_sym) + # np.testing.assert_array_equal(predictions_proba_difference.shape, (len(X_pair), 2)) + predictions_proba_similarity_ab = predictions_proba_difference[:, 0] + predictions_proba_similarity_ba = predictions_proba_difference_sym[:, 0] + predictions_proba_similarity = (predictions_proba_similarity_ab + predictions_proba_similarity_ba) / 2. + + predictions_proba_similarity_df = pd.DataFrame(predictions_proba_similarity.reshape((-1, + len(self.train_features))), + index=pd.DataFrame(X).index, + columns=pd.DataFrame(self.train_features).index) + return predictions_proba_similarity_df + + def __predict_with_prior(self, input_data: np.ndarray, sample_weight): + tests_trains_classes_likelihood = self.predict_proba_samples(input_data) + tests_classes_likelihood = self._apply_weights(tests_trains_classes_likelihood, sample_weight) + np.finfo(tests_classes_likelihood.dtype).eps + tests_classes_likelihood = tests_classes_likelihood / tests_classes_likelihood.sum(axis=1)[:, np.newaxis] + tests_classes_likelihood = tests_classes_likelihood.clip(0, 1) + return tests_classes_likelihood + + def __predict_without_prior(self, input_data: np.ndarray, sample_weight=None): + X = pd.DataFrame(input_data) + predictions_proba_similarity_df: pd.DataFrame = pd.DataFrame(self.predict_similarity_samples(X)) + + def f(predictions_proba_similarity: pd.Series) -> pd.Series: + target = pd.Series(self.target.squeeze()) + df = pd.DataFrame( + {'start': target.reset_index(drop=True), 'similarity': predictions_proba_similarity}) + df = df.fillna(0) + mean = df.groupby('start', observed=False).mean()['similarity'] + return mean + + tests_classes_likelihood_np = predictions_proba_similarity_df.apply(f, axis='columns') + # without this normalization it should work for multiclass-multilabel + if self.proba_aggregate_method == 'norm': + tests_classes_likelihood_np = tests_classes_likelihood_np.values \ + / tests_classes_likelihood_np.values.sum(axis=-1)[:, np.newaxis] + elif self.proba_aggregate_method == 'softmax': + tests_classes_likelihood_np = softmax(tests_classes_likelihood_np, axis=-1) + return tests_classes_likelihood_np + + def predict_proba_samples(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray: + # todo add unit test with weight ==[1 1 1 ] and weights = None + if not isinstance(X, pd.DataFrame): + X = pd.DataFrame(X) + predictions_proba_similarity: pd.DataFrame = self.predict_similarity_samples(X) + + def g(anchor_class: np.ndarray, predicted_similarity: np.ndarray) -> np.ndarray: + """ + + :param anchor_class: array int + :param predicted_similarity: array float + :return: + """ + prior_cls_probs = (1 - self.prior[anchor_class]) + likelyhood_per_anchor = ((1 - predicted_similarity) / prior_cls_probs) + likelyhood_per_anchor = likelyhood_per_anchor * self.prior + n_samples = np.arange(len(likelyhood_per_anchor)) + likelyhood_per_anchor[n_samples, anchor_class] = predicted_similarity + return likelyhood_per_anchor + + anchor_class = self.target.astype(int) + + def f(predictions_proba_similarity: np.ndarray) -> np.ndarray: + """ Here we focus on one test point. + Given its similarity probabilities. + Return the probability for each class""" + test_i_trains_classes = g(anchor_class=anchor_class, predicted_similarity=predictions_proba_similarity) + np.testing.assert_array_equal(test_i_trains_classes.shape, (len(self.target), self.num_classes)) + return test_i_trains_classes + + tests_trains_classes_likelihood = np.apply_along_axis(f, axis=1, arr=predictions_proba_similarity.values) + return tests_trains_classes_likelihood + + def _apply_weights(self, + tests_trains_classes_likelihood: np.ndarray, + sample_weight: np.ndarray) -> np.ndarray: + tests_classes_likelihood = (tests_trains_classes_likelihood * + sample_weight[np.newaxis, :, np.newaxis]).sum(axis=1) + # np.testing.assert_array_almost_equal(tests_classes_likelihood.sum(axis=-1), 1.) + return tests_classes_likelihood + + def _abstract_predict(self, + input_data: InputData, + output_mode: str = 'default'): + sample_weight = np.full(len(self.target), 1 / len(self.target)) if self.sample_weight_ is None \ + else self.sample_weight_.loc[self.target.index].values + + predict_output = Either(value=input_data.features, + monoid=[input_data.features, self.use_prior]).either( + left_function=lambda features: self.__predict_without_prior(features, sample_weight), + right_function=lambda features: self.__predict_with_prior(features, sample_weight)) + return self.pde.predict(predict_output, output_mode, self.target_start_zero) + + def predict(self, + input_data: InputData, + output_mode: str = 'labels') -> pd.Series: + """ For each input sample, output one prediction the most probable class. + + """ + return self._abstract_predict(input_data, output_mode) + + def predict_proba(self, + input_data: InputData, + output_mode: str = 'default') -> pd.Series: + """ For each input sample, output one prediction the most probable class. + + """ + + return self.predict(input_data, output_mode) + + def predict_for_fit(self, + input_data: InputData, + output_mode: str = 'default'): + """ For each input sample, output one prediction the most probable class. + """ + return self.predict(input_data, output_mode) + + def score_difference(self, input_data: InputData) -> float: + """ WE RETURN THE MAE score XD """ + y_pair_diff = self.pde.pair_output_difference(input_data.target, self.target, + self.num_classes) # 0 if similar, 1 if diff + predictions_proba_similarity: pd.DataFrame = self.predict_similarity_samples( + input_data.features, reshape=False) # 0% if different, 100% if similar + + return abs(y_pair_diff - (1 - predictions_proba_similarity)).mean() + + +class PairwiseDifferenceRegressor: + """PDL have a low chance of improvement compared to using directly parametric models like Ridge, Lasso. \ + To obtain an improvement, it is better to use a tree-based model like: ExtraTrees.""" + + def __init__(self, params: Optional[OperationParameters] = None): + self.base_model_params = deepcopy(params._parameters) + del self.base_model_params['model'] + self.base_model = SKLEARN_REG_IMP[params.get('model', 'treg')](**self.base_model_params) + self.pde = PairwiseDifferenceEstimator() + self.prior = None + self.use_prior = False + self.proba_aggregate_method = 'norm' + self.sample_weight_ = None + + def fit(self, + input_data: InputData): + self.num_classes = input_data.num_classes + self.target = input_data.target + self.task_type = input_data.task + self.is_regression_task = self.task_type.task_type.value == 'regression' + self.train_features = input_data.features # Store the classes seen during fit + X_pair, _, y_pair_diff = self.pde._pair_data_regression(self.train_features, + self.train_features, + self.target, + self.target) + self.base_model.fit(X_pair, y_pair_diff) + return self + + def predict(self, + input_data: InputData) -> pd.Series: + return self._abstract_predict(input_data) + + def predict_proba(self, + input_data: InputData) -> pd.Series: + return self.predict(input_data) + + def predict_for_fit(self, + input_data: InputData, + output_mode: str = 'default'): + return self.predict(input_data) + + def _predict_samples(self, input_data: InputData, force_symmetry=True): + """ + For each input sample, output N predictions (where N = the number of anchors). + prediction = difference + y_train + """ + + def repeat(s: pd.Series, n_times: int): + return pd.concat([s] * n_times, ignore_index=True).values + + X = pd.DataFrame(input_data.features) + final_shape = (-1, len(self.train_features)) + # Create pairs of the new instance each anchor (training instance) + X_pair, X_pair_sym, _ = self.pde._pair_data_regression(X, self.train_features, None, None) + # Estimator predicts the difference between each anchor (training instance) and each prediction instance: + predictions_difference: np.ndarray = self.base_model.predict(X_pair) + if force_symmetry: + difference_sym: np.ndarray = self.base_model.predict(X_pair_sym) + predictions_difference = (predictions_difference - difference_sym) / 2. + + # The known y for the training instances + predictions_start: np.ndarray = repeat(pd.Series(self.target), n_times=len(X)) + # Combine the difference predicted by the model with the known y => train_y + predicted difference + predictions: np.ndarray = predictions_start + predictions_difference + # Set of absolute predictions for each anchor for each prediction instance: + prediction_samples_df = pd.DataFrame(predictions.reshape(final_shape), index=X.index) + # The predicted difference to the anchors: + pred_diff_samples_df = pd.DataFrame(predictions_difference.reshape(final_shape), index=X.index) + return prediction_samples_df, pred_diff_samples_df + + def __predict_with_weight(self, input_data, prediction_samples_df): + if isinstance(self.sample_weight_, pd.Series): + def weighted_avg(samples: pd.Series, weights: pd.Series) -> float: + weights[weights <= 0] = np.nan + summed = np.nansum(samples.multiply(weights)) + return summed / np.nansum(weights) + + prediction = prediction_samples_df.apply( + lambda samples: weighted_avg(samples, self.sample_weight_), + axis='columns' + ) + else: + self.sample_weight_[self.sample_weight_ < 0] = np.nan + summed = pd.Series(np.nansum(self.sample_weight_, axis=1), index=input_data.index) + self.sample_weight_ = self.sample_weight_.apply(lambda row: row / summed) + np.testing.assert_array_almost_equal(self.sample_weight_.sum(axis=1), 1.) + prediction = (prediction_samples_df * self.sample_weight_).sum(axis=1) + return prediction + + def _abstract_predict(self, input_data: InputData, force_symmetry=True) -> pd.Series: + """ For each input sample, output one prediction, the mean of the predicted samples. """ + prediction_samples_df, _ = self._predict_samples(input_data=input_data, force_symmetry=force_symmetry) + have_weights = isinstance(self.sample_weight_, pd.Series) or isinstance(self.sample_weight_, pd.DataFrame) + + predict_output = Either(value=pd.DataFrame(input_data.features), + monoid=[prediction_samples_df, have_weights]).either( + left_function=lambda features: features.mean(axis=1), + right_function=lambda init_data: self.__predict_with_weight(init_data, prediction_samples_df)) + + return predict_output.values + + def learn_anchor_weights( + self, + X_val: pd.DataFrame = None, + y_val: pd.Series = None, + X_test: pd.DataFrame = None, + method: str = 'L2', + enable_warnings=True, + **kwargs): + """ + Call this method after the training to create weights for the anchors + using the given validation data. + Use the `method` parameter to select one of the following + weighting methods: + - 'Optimize': Minimize the validation MAE using the SLSQP optimizer with a linear constraint on the sum of the weights. + - 'L1': like `Optimize` but includes L1 regularization. + - 'L2': like `Optimize` but includes L2 regularization. + - 'L1L2': like `Optimize` but includes L1 and L2 regularization. + - 'KLD': like `Optimize` but includes a KLD loss to make the weights more uniform. + - 'ExtremeWeightPruning': lik `L1` but uses high regularization strength. + - 'NegativeError': Calculate weights as the negative mean absolute error. + - 'OrderedVoting': The best of n anchors gets n votes, the worst gets 1 vote. n is the number of anchors. + - 'KmeansClusterCenters': Calculate weights as the distance to the cluster centers of the KMeans algorithm. + """ + if y_val is not None: + old_validation_error = sklearn.metrics.mean_absolute_error(self.predict(X_val), y_val) + else: + old_validation_error = 0 + + if method not in self._name_to_method_mapping.keys(): + raise NotImplementedError(f"Weighting method {method} unknown! Use one of the following:" + f" '{', '.join(list(self._name_to_method_mapping.keys()))}'") + + sample_weight: pd.Series = self._name_to_method_mapping[method](X_val=X_val, y_val=y_val, X_test=X_test, + **kwargs) + assert not sample_weight.isna().any(), f'Nans values in sample_weights using {method}\n {sample_weight}' + self.set_sample_weight(sample_weight) + if y_val is not None: + new_validation_error = sklearn.metrics.mean_absolute_error(self.predict(X_val), y_val) + if new_validation_error > old_validation_error and enable_warnings: + print(f'WARNING: \t new val MAE: {new_validation_error} \t old val MAE: {old_validation_error}') + return self + + def set_sample_weight(self, sample_weight: pd.Series): + """ + Sets the weights for the anchors to the given weights in sample_weight. + + :param sample_weight: The weights for the anchors as a pd.Series + :return: self (with updated weights) + """ + if sample_weight is None: + pass + elif isinstance(sample_weight, pd.Series): + if len(sample_weight) != len(self.y_train_): + raise ValueError( + f'sample_weight size {len(sample_weight)} should be equal to the train size {len(self.y_train_)}') + if not sample_weight.index.equals(self.y_train_.index): + raise ValueError( + f'sample_weight and y_train must have the same index\n{sample_weight.index}\n{self.y_train_.index}') + + if all(sample_weight.fillna(0) == 0): # All weights are 0 => Set them to 1 + sample_weight = pd.Series(1, index=self.y_train_.index) + + if all(sample_weight.fillna(0) < 0): + raise ValueError(f'sample_weight are all negative/Nans.\n{sample_weight}') + if any(pd.isna(sample_weight)): + raise ValueError(f'sample_weight contains NaNs.\n{sample_weight}') + else: + raise ValueError('sample_weight must be a pd.Series') + + self.sample_weight_ = sample_weight + return self diff --git a/fedot_ind/core/models/pdl/pairwise_transform.py b/fedot_ind/core/models/pdl/pairwise_transform.py new file mode 100644 index 000000000..c0bc1f9c4 --- /dev/null +++ b/fedot_ind/core/models/pdl/pairwise_transform.py @@ -0,0 +1,291 @@ +import functools +from typing import Iterable, Optional + +import numpy as np +import pandas as pd +import sklearn.base +from fedot.core.operations.operation_parameters import OperationParameters +from scipy.optimize import LinearConstraint, minimize +from scipy.spatial.distance import cdist +from scipy.stats import entropy +from sklearn.cluster import KMeans +from sklearn.compose import ColumnTransformer +from sklearn.utils.validation import check_is_fitted + + +class PDCDataTransformer: + """ + Transform the data so that it can be processed by PDL models. + """ + preprocessing_: ColumnTransformer + preprocessing_y_: ColumnTransformer # todo fix the ColumnTransformer annotation + + def __init__(self, numeric_features: Iterable = None, + ordinal_features: Iterable = None, + string_features: Iterable = None, + y_type: str = None): + self.numeric_features = numeric_features + self.ordinal_features = ordinal_features + self.string_features = string_features + if y_type is not None and y_type not in ('numeric', 'ordinal', 'string'): + raise ValueError(f"y_type must be one of 'numeric', 'ordinal', 'string' but got {y_type}") + self.y_type = y_type + + def fit(self, X, y=None): + + # y = y.astype('category').cat.codes.astype(np.float32) # todo since I + # cannot transform the output at least add raise type error on it + if self.numeric_features is None and self.ordinal_features is None and self.string_features is None: + self.numeric_features = [] + self.ordinal_features = [] # todo fix name, will be processed a ordinal + self.string_features = [] + for column in X.columns: + dtype = X[column].dtype + if pd.api.types.is_numeric_dtype(dtype): + self.numeric_features.append(column) + elif isinstance(dtype, pd.CategoricalDtype): + if dtype.ordered: + self.ordinal_features.append(column) # ordinal... + else: + self.string_features.append(column) + elif pd.api.types.is_bool_dtype(dtype): # pd.api.types.is_categorical_dtype(dtype) deprecated + self.string_features.append(column) + elif pd.api.types.is_string_dtype(dtype): + self.string_features.append(column) + + X, _ = self.cast_uint(X) + if self.y_type == 'numeric': + from sklearn.preprocessing import StandardScaler + self.preprocessing_y_ = StandardScaler() + elif self.y_type == 'ordinal': # string + from sklearn.preprocessing import OrdinalEncoder + self.preprocessing_y_ = OrdinalEncoder() + elif self.y_type == 'string': + from sklearn.preprocessing import OneHotEncoder + self.preprocessing_y_ = OneHotEncoder() + + if y is not None and self.preprocessing_y_ is not None: + if isinstance(y, pd.Series): + y = pd.DataFrame(y) + self.preprocessing_y_.fit(y) + + return self + + def cast_uint(self, X: pd.DataFrame, y: pd.Series = None): + numeric_cols = X.select_dtypes(include=['number']).columns + X.loc[:, numeric_cols] = X[numeric_cols].astype('float32') + if y is not None: + y = y.astype('float32') + return X, y + + def transform(self, X, y=None): + check_is_fitted(self) + X, _ = self.cast_uint(X) + X = pd.DataFrame(self.preprocessing_.transform(X)) + from scipy.sparse import csr_matrix + if any(isinstance(e, csr_matrix) for e in X.values.flatten()): + raise NotImplementedError('error in data \t X contains sparse features (csr_matrix)') + X = X.dropna(axis=1, how='all') # Drop columns with all NaN values + X = X.astype(np.float32) + + if len(X.columns) == 0: + raise ValueError('error in data \t X no features left after pre-processing') + # if X.isna().any().any(): + # raise NotImplementedError('error in data \t Some features are NaNs in the X set') + if any(x in pd.Series(X.values.flatten()).apply(type).unique() for x in + ('csr_matrix', 'date',)): # todo think about adding 'str' + raise NotImplementedError('error in data \t Dataset contains sparse data') + + if y is not None and self.preprocessing_ is not None: + y = pd.Series(self.preprocessing_.transform(y), name='y') + if y is None: + return X.values + return X.values, y.values + + +class SampleWeights: + def __init__(self, params: Optional[OperationParameters] = None): + # Save information about the weighting methods as here for better availability + self.method = params.get('method', 'L2') + + self.method_dict = { + # Optimization based methods: + 'L2': functools.partial(self._sample_weight_optimize, l2_lambda=0.1), + 'KLD': functools.partial(self._sample_weight_optimize, kld_lambda=0.05), + 'Optimize': self._sample_weight_optimize, + 'L1L2': functools.partial(self._sample_weight_optimize, l1_lambda=0.05, l2_lambda=0.025), + 'L1': functools.partial(self._sample_weight_optimize, l1_lambda=0.1), + 'ExtremeWeightPruning': self._sample_weight_extreme_pruning, + # Heuristic methods + 'NegativeError': self._sample_weight_negative_error, + 'InverseError': self._sample_weight_inverse_error, + 'OrderedVoting': self._sample_weight_ordered_votes, + # Other Methods: + 'KMeansClusterCenters': self._sample_weight_by_kmeans_prototypes, + } + + def _normalize_weights(self, weights: np.ndarray) -> pd.Series: + """ + Normalize the weights to be between 0 and 1 + :param weights: The weights to be normalized as a pd.Series + """ + if all(np.isclose(weights, weights.values[0])): + weights = pd.Series(1., index=weights.index) + assert weights.min() >= 0, f'Negative weights found: {weights[weights < 0]}' + weights /= weights.sum() + return weights + + def __objective_function(self, + weights: np.ndarray, + pred_val_samples_np: np.ndarray, + y_val: np.ndarray, + initial_mae: float, + kld_lambda=0., + l1_lambda=0., + l2_lambda=0.) -> float: + assert kld_lambda >= 0, f'kld_lambda should be >=0, got {kld_lambda}' + assert l1_lambda >= 0, f'l1_lambda should be >=0, got {l1_lambda}' + assert l2_lambda >= 0, f'l2_lambda should be >=0, got {l2_lambda}' + assert initial_mae >= 0, f'initial_mae should be >=0, got {initial_mae}' + + predictions = np.matmul(pred_val_samples_np, weights / sum(weights)) + mae = sklearn.metrics.mean_absolute_error(y_val, predictions) + + regularisation = 0 + if kld_lambda > 0: + train_size = len(weights) + weights_initial_guess = np.ones(train_size) / train_size + regularisation += kld_lambda * entropy(weights, weights_initial_guess) / train_size + if l1_lambda > 0: + regularisation += l1_lambda * (np.linalg.norm(weights, ord=1) - max(weights)) + if l2_lambda > 0: + regularisation += l2_lambda * np.linalg.norm(weights, ord=2) + + regularisation *= initial_mae + loss = mae + regularisation + return loss + + def _sample_weight_optimize(self, X_val: pd.DataFrame, y_val: pd.Series, kld_lambda=0., l1_lambda=0., l2_lambda=0., + **kwargs) -> pd.Series: + """ + Minimize the validation MAE using SLSQP optimizer + with a linear constraint on the sum of the weights. + + :param X_val: + :param y_val: + :param kld_lambda: alpha=0.01 i.e. I am ready to loose 1% of the validation MAE to make the solution more general + :return: + """ + prediction_samples_df, _ = self._predict_samples(X_val) + pred_val_samples_np = prediction_samples_df.values + train_size = len(self.X_train_) + weights_initial_guess = np.ones(train_size) / train_size + initial_mae = sklearn.metrics.mean_absolute_error(y_val, np.matmul(pred_val_samples_np, weights_initial_guess)) + + def objective_function(weights: np.ndarray) -> float: + return self.__objective_function(weights=weights, pred_val_samples_np=pred_val_samples_np, y_val=y_val, + initial_mae=initial_mae, kld_lambda=kld_lambda, l1_lambda=l1_lambda, + l2_lambda=l2_lambda) + + variable_bounds = [(0., 1.) for _ in range(train_size)] + sum_constraint = LinearConstraint(np.ones(train_size), lb=1, ub=1) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + result = minimize(objective_function, weights_initial_guess, method='SLSQP', bounds=variable_bounds, + constraints=[sum_constraint]) + # Extract the solution + optimal_weight = result.x + + # print("the optimal solution:", optimal_weight) + # print("Optimal Objective Value, i.e. new log loss validation error:", result.fun) + sample_weights = pd.Series(optimal_weight, index=self.X_train_.index) + return sample_weights + + def _sample_weight_extreme_pruning(self, X_val: pd.DataFrame, y_val: pd.Series, **kwargs) -> pd.Series: + l1 = 0.8 + while l1 > 0.0001: + weights = self._sample_weight_optimize(X_val=X_val, y_val=y_val, l1_lambda=l1) + if sum(weights == 0) / len(weights) > .9: + l1 *= 0.5 + else: + break + return weights + + def _error(self, X_val: pd.DataFrame, y_val: pd.Series, **kwargs) -> pd.Series: + """ + Calculate the Mean Absolute Error for each anchor. + :param X_val: + :param y_val: + :param kwargs: + :return: + """ + pred_val_samples, _ = self._predict_samples(X_val) + errors = pred_val_samples.apply(lambda one_val_samples: abs(y_val - one_val_samples), axis=0) + val_mae = errors.mean() + np.testing.assert_array_equal(val_mae.index, self.X_train_.index) + return val_mae + + def _sample_weight_inverse_error(self, X_val: pd.DataFrame, y_val: pd.Series, **kwargs) -> pd.Series: + val_mae = self._error(X_val=X_val, y_val=y_val) + sample_weights = 1. / (val_mae + 0.0001) + sample_weights = sample_weights / sample_weights.sum() + return sample_weights + + def _sample_weight_negative_error(self, X_val: pd.DataFrame, y_val: pd.Series, **kwargs) -> pd.Series: + uniform_weights = pd.Series([1 / len(self.X_train_)] * len(self.X_train_), index=self.X_train_.index) + val_mae = self._error(X_val=X_val, y_val=y_val) + if sum(val_mae) == 0: + return uniform_weights + sample_weights = ((-val_mae) + max(val_mae)) / sum(val_mae) + if sum(sample_weights) == 0: + return uniform_weights + sample_weights = sample_weights / sample_weights.sum() + return sample_weights + + @staticmethod + def _sample_weight_ordered_votes_from_weights(received_weights): + errors = - received_weights + k = len(errors) + ranks = np.argsort(np.argsort(errors)) + 1 + weights = (k - ranks + 1) / (k * (k + 1) / 2) + return weights + + def _sample_weight_ordered_votes(self, X_val, y_val, force_symmetry=True, **kwargs): + """ + The best of n anchors gets n votes, the worst gets 1 vote. n is the nb of anchors. Uses the _sample_weight_negative_error function + for distribution votes. + works quite good + :param force_symmetry: Sets the force_symmetry parameter of the prediction function + :return: The weights as np.NDarray + """ + weights = self._sample_weight_negative_error(X_val, y_val, force_symmetry=force_symmetry) + return self._sample_weight_ordered_votes_from_weights(weights) + + def _sample_weight_by_kmeans_prototypes(self, k=None, **kwargs): + """ + Use KMeans to cluster the train data. Use the k centroids/prototypes found by knn as weights. + We keep only K anchors that are the prototypes. all other anchors receive a weight of 0 + + :param force_symmetry: Sets the force_symmetry parameter of the prediction function + :param k: The number of prototypes to use. If None, 10% of the training set is used as prototypes + :return: The weights as np.NDarray + """ + if not k: + k = max(int(len(self.X_train_) / 10), 3) # 10% and min 3 of the training set data points is used as weights + + kmeans = KMeans(n_clusters=k, n_init="auto", random_state=0) + kmeans.fit(self.X_train_) + + cluster_centers = kmeans.cluster_centers_ # Get the cluster centers (prototypical data points) + distances = cdist(self.X_train_, cluster_centers) # distance between each data point and each cluster center + closest_indices = np.argmin(distances, axis=0) # Get the index of the closest data points to the clusters + + # Create an array to mark the closest data points + closest_array = np.zeros(len(self.X_train_)) + closest_array[closest_indices] = 1 / k + + s = pd.Series(closest_array, index=self.X_train_.index) + s = s.fillna(0) # I don't know why there are NaNs rather than 0s + assert not s.isna().any(), f'Nans values in sample_weights using KMeans\n {s}' + return s diff --git a/fedot_ind/core/operation/decomposition/matrix_decomposition/column_sampling_decomposition.py b/fedot_ind/core/operation/decomposition/matrix_decomposition/column_sampling_decomposition.py index ae2055be6..61b5d7d07 100644 --- a/fedot_ind/core/operation/decomposition/matrix_decomposition/column_sampling_decomposition.py +++ b/fedot_ind/core/operation/decomposition/matrix_decomposition/column_sampling_decomposition.py @@ -1,29 +1,48 @@ -from typing import Tuple +from typing import Tuple, Union, Optional +from fedot.core.operations.operation_parameters import OperationParameters from numpy import linalg as LA from sklearn import preprocessing from sklearn.random_projection import johnson_lindenstrauss_min_dim from fedot_ind.core.architecture.settings.computational import backend_methods as np +from fedot_ind.core.repository.constanst_repository import DEFAULT_SVD_SOLVER + +RANK_REPRESENTATION = Union[int, float] class CURDecomposition: - def __init__(self, rank, - return_samples: bool = True): - self.selection_rank = None - self.return_samples = return_samples - if not self.return_samples: - self.rank = min(20000, rank) - else: - self.rank = rank + """ + CUR decomposition is a low-rank matrix decomposition method that is based on selecting + a subset of columns and rows of the original matrix. The method is based on the + Johnson-Lindenstrauss lemma and is used to approximate the original matrix with a + low-rank matrix. The CUR decomposition is defined as follows: + A = C @ U @ R + where A is the original matrix, C is a subset of columns of A, U is a subset of rows of A, + and R is a subset of rows of A. The selection of columns and rows is based on the + probabilities p and q, which are computed based on the norms of the columns and rows of A. + The selection of columns and rows is done in such a way that the approximation error is minimized. + + Args: + params: the parameters of the operation + rank: the rank of the decomposition + tolerance: the tolerance of the decomposition + return_samples: whether to return the samples or the decomposition matrices + + """ + + def __init__(self, params: Optional[OperationParameters] = None): + self.selection_rank = params.get('rank', None) + self.tolerance = params.get('tolerance', [0.5, 0.1, 0.05]) + self.return_samples = params.get('return_samples', True) self.column_indices = None self.row_indices = None self.column_space = 'Full' - @staticmethod - def _get_selection_rank(matrix): + def _get_selection_rank(self, matrix): """ - Compute the selection rank for the CUR decomposition. It must be at least 4 times the rank of the matrix but not + Compute the selection rank for the CUR decomposition. + It must be at least 4 times the rank of the matrix but not greater than the number of rows or columns of the matrix. Args: @@ -32,9 +51,8 @@ def _get_selection_rank(matrix): Returns: the selection rank """ - tol = [0.5, 0.1, 0.05] n_samples = max(matrix.shape) - min_num_samples = johnson_lindenstrauss_min_dim(n_samples, eps=tol).tolist() + min_num_samples = johnson_lindenstrauss_min_dim(n_samples, eps=self.tolerance).tolist() return max([x if x < n_samples else n_samples for x in min_num_samples]) def get_aproximation_error(self, original_tensor, cur_matrices: tuple): @@ -49,7 +67,8 @@ def fit_transform(self, feature_tensor: np.ndarray, target: np.ndarray = None) -> tuple: feature_tensor = feature_tensor.squeeze() # transformer = random_projection.SparseRandomProjection().fit_transform(target) - self.selection_rank = self._get_selection_rank(feature_tensor) + if self.selection_rank is None: + self.selection_rank = self._get_selection_rank(feature_tensor) self._balance_target(target) # create sub matrices for CUR-decompostion array = np.array(feature_tensor.copy()) @@ -59,7 +78,7 @@ def fit_transform(self, feature_tensor: np.ndarray, sampled_tensor = sampled_tensor[self.row_indices, :] else: # evaluate pseudoinverse for W - U^-1 - X, Sigma, y_T = np.linalg.svd(w, full_matrices=False) + X, Sigma, y_T = DEFAULT_SVD_SOLVER(w, full_matrices=False) Sigma_plus = np.linalg.pinv(np.diag(Sigma)) # aprox U using pseudoinverse u = y_T.T @ Sigma_plus @ Sigma_plus @ X.T @@ -69,6 +88,11 @@ def fit_transform(self, feature_tensor: np.ndarray, target = target[self.row_indices] return sampled_tensor, target + def transform(self, feature_tensor: np.ndarray, + target: np.ndarray = None) -> tuple: + + return self.fit_transform(feature_tensor, target) + def reconstruct_basis(self, C, U, R, ts_length): # if len(U.shape) > 1: # multi_reconstruction = lambda x: self.reconstruct_basis(C=C, U=U, R=x, ts_length=ts_length) @@ -93,7 +117,8 @@ def select_rows_cols( # Compute the probabilities for selecting columns and rows col_probs, row_probs = col_norms / matrix_norm, row_norms / matrix_norm - + if isinstance(self.selection_rank, float): + self.selection_rank = round(max(matrix.shape) * self.selection_rank) is_matrix_tall = self.selection_rank > matrix.shape[1] col_rank = self.selection_rank if not is_matrix_tall or self.column_space == 'Full' \ else len([prob for prob in col_probs if prob > 0.01]) @@ -134,47 +159,3 @@ def matrix_to_ts(matrix: np.ndarray) -> np.ndarray: for i in range(matrix.shape[0]): ts[i:i + matrix.shape[1]] += matrix[i] return ts - - -def get_random_sparse_matrix(size: tuple): - """Generate random sparse matrix with size = size""" - - matrix = np.zeros(size) - for i in range(size[0]): - for j in range(size[1]): - if np.random.rand() < 0.1: - matrix[i, j] = np.random.rand() - return matrix - - -if __name__ == '__main__': - from fedot_ind.tools.loader import DataLoader - - arr = np.array([[1, 1, 1, 0, 0], - [3, 3, 3, 0, 0], - [4, 4, 4, 0, 0], - [5, 5, 5, 0, 0], - [0, 0, 0, 4, 4], - [0, 0, 0, 5, 5], - [0, 0, 0, 2, 2]]) - - (X_train, y_train), (X_test, y_test) = DataLoader('Lightning7').load_data() - - # init_ts = train[0].iloc[0, :].values - # scaler = MinMaxScaler() - # scaler.fit(init_ts.reshape(-1, 1)) - # single_ts = scaler.transform(init_ts.reshape(-1, 1)).reshape(-1) - - cur = CURDecomposition(rank=20) - # M = cur.ts_to_matrix(single_ts, 30) - C, U, R = cur.fit_transform(X_train) - basis = cur.reconstruct_basis(C, U, R, X_train.shape[1]) - - # rec_ts = cur.matrix_to_ts(C @ U @ R) - # err = np.linalg.norm(single_ts - rec_ts) - - # plt.plot(init_ts, label='init_ts') - # plt.plot(scaler.inverse_transform(rec_ts.reshape(-1, 1)), label='rec_ts') - # plt.legend() - # plt.show() - _ = 1 diff --git a/fedot_ind/core/operation/decomposition/matrix_decomposition/dmd_decomposition.py b/fedot_ind/core/operation/decomposition/matrix_decomposition/dmd_decomposition.py index 27af8bb8f..7f82f25c7 100644 --- a/fedot_ind/core/operation/decomposition/matrix_decomposition/dmd_decomposition.py +++ b/fedot_ind/core/operation/decomposition/matrix_decomposition/dmd_decomposition.py @@ -1,10 +1,10 @@ from fedot_ind.core.architecture.settings.computational import backend_methods as np -from numpy.linalg import svd +from fedot_ind.core.repository.constanst_repository import DEFAULT_SVD_SOLVER, DEFAULT_QR_SOLVER def rq(A): n, m = A.shape - Q, R = np.linalg.qr(np.flipud(A).T, mode='complete') + Q, R = DEFAULT_QR_SOLVER(np.flipud(A).T, mode='complete') R = np.rot90(R.T, 2) Q = np.flipud(Q.T) if n > m: @@ -18,7 +18,7 @@ def tls(A, B): if A.shape[0] != B.shape[0]: raise ValueError('Matrices are not conformant.') R1 = np.hstack((A, B)) - U, S, V = np.linalg.svd(R1) + U, S, V = DEFAULT_SVD_SOLVER(R1) r = B.shape[1] R, Q = rq(V[:, r:]) Gamma = R[n:, n - r:] @@ -28,7 +28,7 @@ def tls(A, B): def exact_dmd_decompose(X, Y, rank): - Ux, Sx, Vx = svd(X) + Ux, Sx, Vx = DEFAULT_SVD_SOLVER(X) Ux = Ux[:, :rank] Sx = Sx[:rank] Sx = np.diag(Sx) @@ -46,14 +46,14 @@ def A(v): return np.dot(a=Ux, b=np.dot(a=Atilde, b=np.dot(a=Ux.T, b=v))) def orthogonal_dmd_decompose(X, Y, rank): - Ux, _, _ = svd(X) + Ux, _, _ = DEFAULT_SVD_SOLVER(X) Ux = Ux[:, :rank] # Project X (current state) and Y (future state) on leading components of X Yproj = Ux.T @ Y Xproj = Ux.T @ X # A_proj is constrained to be a unitary matrix and the minimization problem is argmin (A.T @ A = I) |Y-AX|_frob # The solution of A_proj is obtained by Schonemann A = Uyx,@ Vyx.T - Uyx, _, Vyx = svd(Yproj @ Xproj.T) + Uyx, _, Vyx = DEFAULT_SVD_SOLVER(Yproj @ Xproj.T) Aproj = Uyx @ Vyx.T def A(x): return np.dot(a=Ux, b=np.dot(a=Aproj, b=np.dot(a=Ux.T, b=x))) # Diagonalise unitary operator @@ -65,7 +65,7 @@ def A(x): return np.dot(a=Ux, b=np.dot(a=Aproj, b=np.dot(a=Ux.T, b=x))) def symmetric_decompose(X, Y, rank): - Ux, S, V = np.linalg.svd(X) + Ux, S, V = DEFAULT_SVD_SOLVER(X) C = np.dot(Ux.T, np.dot(Y, V)) C1 = C if rank is None: diff --git a/fedot_ind/core/operation/decomposition/matrix_decomposition/power_iteration_decomposition.py b/fedot_ind/core/operation/decomposition/matrix_decomposition/power_iteration_decomposition.py index 1846d3e80..801f5ce9f 100644 --- a/fedot_ind/core/operation/decomposition/matrix_decomposition/power_iteration_decomposition.py +++ b/fedot_ind/core/operation/decomposition/matrix_decomposition/power_iteration_decomposition.py @@ -7,9 +7,31 @@ from fedot_ind.core.operation.filtration.channel_filtration import _detect_knee_point from fedot_ind.core.operation.transformation.regularization.spectrum import singular_value_hard_threshold, \ sv_to_explained_variance_ratio, eigencorr_matrix +from fedot_ind.core.repository.constanst_repository import DEFAULT_SVD_SOLVER, DEFAULT_QR_SOLVER class RSVDDecomposition: + """Randomized SVD decomposition with power iteration method. + Implements the block Krylov subspace method for computing the SVD of a matrix with a low computational cost. + The method is based on the power iteration procedure, which allows us to obtain a low-rank approximation of the + matrix. The method is based on the following steps: + 1. Random projection of the matrix. + 2. Transformation of the initial matrix to the Gram matrix. + 3. Power iteration procedure. + 4. Orthogonalization of the resulting "sampled" matrix. + 5. Projection of the initial Gram matrix on the new basis obtained from the "sampled matrix". + 6. Classical svd decomposition with the chosen type of spectrum thresholding. + 7. Compute matrix approximation and choose a new low_rank. + 8. Return matrix approximation. + + Args: + params: dictionary with parameters for the operation: + rank: rank of the matrix approximation + power_iter: polynom degree for power iteration procedure + sampling_share: percent of sampling columns. By default - 70% + + """ + def __init__(self, params: Optional[OperationParameters] = {}): self.rank = params.get('rank', 1) # Polynom degree for power iteration procedure. @@ -81,7 +103,7 @@ def rsvd(self, # thresholding if not approximation: # classic svd decomposition - Ut, St, Vt = np.linalg.svd(tensor, full_matrices=False) + Ut, St, Vt = DEFAULT_SVD_SOLVER(tensor, full_matrices=False) # Compute low rank. low_rank = self._spectrum_regularization(St, reg_type=reg_type) if regularized_rank is not None: @@ -110,14 +132,13 @@ def rsvd(self, AAT, self.poly_deg) @ tensor @ self.random_projection # Fourth step. Orthogonalization of the resulting "sampled" matrix # creates for us a basis of eigenvectors. - sampled_tensor_orto, _ = np.linalg.qr( - sampled_tensor, mode='reduced') + sampled_tensor_orto, _ = DEFAULT_QR_SOLVER(sampled_tensor, mode='reduced') # Fifth step. Project initial Gramm matrix on new basis obtained # from "sampled matrix". M = sampled_tensor_orto.T @ AAT @ sampled_tensor_orto # Six step. Classical svd decomposition with choosen type of # spectrum thresholding - Ut, St, Vt = np.linalg.svd(M, full_matrices=False) + Ut, St, Vt = DEFAULT_SVD_SOLVER(M, full_matrices=False) # Compute low rank. low_rank = self._spectrum_regularization(St, reg_type=reg_type) # Seven step. Compute matrix approximation and choose new low_rank @@ -127,6 +148,6 @@ def rsvd(self, # Eight step. Return matrix approximation. reconstr_tensor = self._compute_matrix_approximation( Ut, sampled_tensor_orto, tensor, regularized_rank) - U_, S_, V_ = np.linalg.svd(reconstr_tensor, full_matrices=False) + U_, S_, V_ = DEFAULT_SVD_SOLVER(reconstr_tensor, full_matrices=False) return [U_, S_, V_] diff --git a/fedot_ind/core/operation/filtration/feature_filtration.py b/fedot_ind/core/operation/filtration/feature_filtration.py index 62ac1eca9..968c1d2d0 100644 --- a/fedot_ind/core/operation/filtration/feature_filtration.py +++ b/fedot_ind/core/operation/filtration/feature_filtration.py @@ -121,8 +121,11 @@ def filter_signal(self, data): class FeatureSpaceReducer: + def __init__(self): + self.is_fitted = False + self.feature_mask = None - def reduce_feature_space(self, features: pd.DataFrame, + def reduce_feature_space(self, features: np.array, var_threshold: float = 0.01, corr_threshold: float = 0.98) -> pd.DataFrame: """Method responsible for reducing feature space. @@ -136,43 +139,30 @@ def reduce_feature_space(self, features: pd.DataFrame, Dataframe with reduced feature space. """ - features.shape[1] - - features = self._drop_stable_features(features, var_threshold) + features = self._drop_constant_features(features, var_threshold) features_new = self._drop_correlated_features(corr_threshold, features) + self.is_fitted = True return features_new def _drop_correlated_features(self, corr_threshold, features): - features_corr = features.corr(method='pearson') - mask = np.ones(features_corr.columns.size) - \ - np.eye(features_corr.columns.size) - df_corr = mask * features_corr - drops = [] - for col in df_corr.columns.values: - # continue if the feature is already in the drop list - if np.in1d([col], drops): - continue - - index_of_corr_feature = df_corr[abs( - df_corr[col]) > corr_threshold].index - drops = np.union1d(drops, index_of_corr_feature) - - if len(drops) == 0: - self.logger.info('No correlated features found') - return features - - features_new = features.copy() - features_new.drop(drops, axis=1, inplace=True) - return features_new - - def _drop_stable_features(self, features, var_threshold): + features_corr = np.corrcoef(features.squeeze().T) + n_features = features_corr.shape[0] + identity_matrix = np.eye(n_features) + features_corr = features_corr - identity_matrix + correlation_mask = abs(features_corr) > corr_threshold + correlated_features = list(set(np.where(correlation_mask)[0])) + percent_of_filtred_feats = (1 - (n_features - len(correlated_features)) / n_features) * 100 + return features if percent_of_filtred_feats > 50 else features + + def _drop_constant_features(self, features, var_threshold): try: + is_2d_data = len(features.shape) <= 2 variance_reducer = VarianceThreshold(threshold=var_threshold) - variance_reducer.fit_transform(features) - unstable_features_mask = variance_reducer.get_support() - features = features.loc[:, unstable_features_mask] + variance_reducer.fit_transform(features.squeeze()) + self.feature_mask = variance_reducer.get_support() + features = features[:, :, self.feature_mask] if not is_2d_data else features[:, self.feature_mask] except ValueError: - self.logger.info( + print( 'Variance reducer has not found any features with low variance') return features diff --git a/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py b/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py index ba179caac..3664f83a1 100644 --- a/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py +++ b/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py @@ -21,6 +21,17 @@ class MultiDimPreprocessingStrategy(EvaluationStrategy): + """ + Class for preprocessing operations that can be used for multi-dimensional data. + + Args: + operation_impl: operation implementation + operation_type: operation type + params: operation parameters + mode: mode of operation. Can be 'one_dimensional', 'channel_independent' or 'multi_dimensional' + + """ + def __init__(self, operation_impl, operation_type: str, params: Optional[OperationParameters] = None, diff --git a/fedot_ind/core/operation/transformation/basis/abstract_basis.py b/fedot_ind/core/operation/transformation/basis/abstract_basis.py index 9679bf777..9008891e0 100644 --- a/fedot_ind/core/operation/transformation/basis/abstract_basis.py +++ b/fedot_ind/core/operation/transformation/basis/abstract_basis.py @@ -1,11 +1,12 @@ from typing import Optional, Union +import dask import pandas as pd from fedot.core.data.data import InputData from fedot.core.operations.operation_parameters import OperationParameters -from joblib import delayed, Parallel from pymonad.either import Either from pymonad.list import ListMonad +from tqdm.dask import TqdmCallback from fedot_ind.core.architecture.preprocessing.data_convertor import DataConverter, NumpyConverter from fedot_ind.core.architecture.settings.computational import backend_methods as np @@ -73,11 +74,10 @@ def _transform(self, """ features = DataConverter(data=input_data).convert_to_monad_data() - parallel = Parallel(n_jobs=self.n_processes, - verbose=0, pre_dispatch="2*n_jobs") - v = parallel(delayed(self._transform_one_sample)(sample) - for sample in features) - predict = NumpyConverter(data=np.array(v)).convert_to_torch_format() + evaluation_results = list(map(lambda sample: self._transform_one_sample(sample), features)) + with TqdmCallback(desc=f"compute_transformation_to_{self.__repr__()}"): + evaluation_results = dask.compute(*evaluation_results) + predict = NumpyConverter(data=np.array(evaluation_results)).convert_to_torch_format() return predict def _get_multidim_basis(self, input_data): diff --git a/fedot_ind/core/operation/transformation/basis/eigen_basis.py b/fedot_ind/core/operation/transformation/basis/eigen_basis.py index 0fba84dcb..4d6824650 100644 --- a/fedot_ind/core/operation/transformation/basis/eigen_basis.py +++ b/fedot_ind/core/operation/transformation/basis/eigen_basis.py @@ -1,13 +1,14 @@ from typing import Optional +import dask import tensorly as tl from fedot.core.data.data import InputData, OutputData from fedot.core.operations.operation_parameters import OperationParameters from fedot.core.repository.dataset_types import DataTypesEnum -from joblib import delayed, Parallel from pymonad.either import Either from pymonad.list import ListMonad from tensorly.decomposition import parafac +from tqdm.dask import TqdmCallback from fedot_ind.core.architecture.preprocessing.data_convertor import DataConverter, NumpyConverter from fedot_ind.core.architecture.settings.computational import backend_methods as np @@ -45,22 +46,19 @@ def __repr__(self): def _channel_decompose(self, features): number_of_dim = list(range(features.shape[1])) + one_dim_predict = len(number_of_dim) == 1 predict = [] if self.SV_threshold is None: self.SV_threshold = self.get_threshold(data=features) self.logging_params.update({'SV_thr': self.SV_threshold}) - - if len(number_of_dim) == 1: - predict = [self._transform_one_sample( - signal) for signal in features[:, 0, :]] - predict = [[np.array(v) if len(v) > 1 else v[0] for v in predict]] + if one_dim_predict: + evaluation_results = list(map(lambda sample: self._transform_one_sample(sample), features[:, 0, :])) else: - for dimension in number_of_dim: - parallel = Parallel(n_jobs=self.n_processes, - verbose=0, pre_dispatch="2*n_jobs") - v = parallel(delayed(self._transform_one_sample)(sample) - for sample in features[:, dimension, :]) - predict.append(np.array(v) if len(v) > 1 else v[0]) + evaluation_results = list(map(lambda dimension: [self._transform_one_sample(sample) + for sample in features[:, dimension, :]], number_of_dim)) + with TqdmCallback(desc=fr"compute_feature_extraction_with_{self.__repr__()}"): + feature_matrix = dask.compute(*evaluation_results) + predict = [[np.array(v) if len(v) > 1 else v[0] for v in feature_matrix]] return predict def _convert_basis_to_predict(self, basis, input_data): @@ -149,26 +147,24 @@ def data_driven_basis(Monoid): return ListMonad(reconstruct_basis( return basis def get_threshold(self, data) -> int: - svd_numbers = [] + number_of_dim = list(range(data.shape[1])) + one_dim_predict = len(number_of_dim) == 1 def mode_func(x): return max(set(x), key=x.count) - number_of_dim = list(range(data.shape[1])) - if len(number_of_dim) == 1: - svd_numbers = [self._transform_one_sample( - signal, svd_flag=True) for signal in data[:, 0, :]] - if len(svd_numbers) == 0: - raise ValueError('Error in spectrum calculation') + if one_dim_predict: + svd_numbers = list(map(lambda sample: + self._transform_one_sample(sample, svd_flag=True), data[:, 0, :])) else: - for dimension in number_of_dim: - dimension_rank = [] - for signal in data[:, dimension, :]: - dimension_rank.append( - self._transform_one_sample(signal, svd_flag=True)) - svd_numbers.append(mode_func(dimension_rank)) - return mode_func(svd_numbers) - + dimension_rank = [] + svd_numbers = list(map(lambda dimension: + [dimension_rank.append(self._transform_one_sample(signal, svd_flag=True)) + for signal in data[:, dimension, :]], number_of_dim)) + rank = dask.compute(*svd_numbers) + return mode_func(rank) + + @dask.delayed def _transform_one_sample(self, series: np.array, svd_flag: bool = False): window_size = round(series.shape[0] * (self.window_size / 100)) trajectory_transformer = HankelMatrix( diff --git a/fedot_ind/core/operation/transformation/basis/fourier.py b/fedot_ind/core/operation/transformation/basis/fourier.py index 0660253a7..b7dfbd5a9 100644 --- a/fedot_ind/core/operation/transformation/basis/fourier.py +++ b/fedot_ind/core/operation/transformation/basis/fourier.py @@ -1,5 +1,6 @@ from typing import Optional +import dask import pandas as pd from fedot.core.operations.operation_parameters import OperationParameters from matplotlib import pyplot as plt @@ -34,11 +35,32 @@ def __init__(self, params: Optional[OperationParameters] = None): self.min_rank = params.get('low_rank', 5) self.estimator = SPECTRUM_ESTIMATORS[params.get('estimator', 'eigen')] + self.return_feature_vector = params.get('compute_heuristic_representation', False) self.basis = None self.filtred_signal = None self.logging_params.update({'threshold': self.threshold}) + def _compute_heuristic_features(self, input_data): + periodogram_class = SPECTRUM_ESTIMATORS['non_parametric'] + estimator = periodogram_class(data=input_data, sampling=self.sampling_rate) + fft = estimator.psd + # freq, fft = periodogram(input_data[None, :], + # fs=self.sampling_rate, + # window='hann', + # detrend=False, return_onesided=True, scaling='spectrum', axis=1) + fft_mean = fft.mean() + fft_var = fft.var() + fft_rms = np.sqrt(np.mean(fft ** 2)) + fft_peak_value = fft.max() + fft_peak_freq = fft[np.argmax(fft)] + fft_energy = np.sum(fft) + # features['fft_energy_db'] = 10 * np.log10(fft).sum(axis=1) + fft_crest_factor = fft_peak_value / fft_rms + feature_vector = [fft_mean, fft_var, fft_rms, fft_peak_value, fft_peak_freq, fft_energy, fft_crest_factor] + feature_vector = [round(x, 3) for x in feature_vector] + return np.array(feature_vector) + def _visualise_spectrum(self, estimator): import matplotlib matplotlib.use('TkAgg') @@ -59,6 +81,8 @@ def _decompose_signal(self, input_data): estimator = self._build_spectrum(input_data) # self._visualise_spectrum(estimator) psd = estimator.psd + if self.return_feature_vector: + return self._compute_heuristic_features(input_data) dominant_freq = np.where(psd >= np.quantile(psd, q=self.threshold))[0] if self.approximation == 'exact': psd[dominant_freq] = 0 @@ -67,5 +91,6 @@ def _decompose_signal(self, input_data): self.filtred_signal = psd if self.output_format == 'spectrum' else np.fft.irfft(psd).reshape(1, -1) return self.filtred_signal + @dask.delayed def _transform_one_sample(self, series: np.array): return self._get_basis(series) diff --git a/fedot_ind/core/operation/transformation/basis/wavelet.py b/fedot_ind/core/operation/transformation/basis/wavelet.py index f9dd744c7..0ff77272f 100644 --- a/fedot_ind/core/operation/transformation/basis/wavelet.py +++ b/fedot_ind/core/operation/transformation/basis/wavelet.py @@ -1,5 +1,6 @@ from typing import Optional, Tuple +import dask import pywt from fedot.core.operations.operation_parameters import OperationParameters from pymonad.either import Either @@ -25,25 +26,45 @@ def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) self.n_components = params.get('n_components') self.wavelet = params.get('wavelet') + self.use_low_freq = params.get('low_freq', False) + self.scales = params.get('scale', WAVELET_SCALES) self.basis = None self.discrete_wavelets = DISCRETE_WAVELETS self.continuous_wavelets = CONTINUOUS_WAVELETS - self.scales = WAVELET_SCALES + self.return_feature_vector = params.get('compute_heuristic_representation', False) def __repr__(self): return 'WaveletBasisImplementation' + def _compute_heuristic_features(self, input_data): + wp = pywt.WaveletPacket(data=input_data[None, :], wavelet=self.wavelet, + maxlevel=3, axis=1, + mode='smooth') + + wpd_approximate_3 = wp['aaa'].data.sum() + wpd_approximate_2 = wp['aa'].data.sum() + wpd_approximate_1 = wp['a'].data.sum() + wpd_detail_3 = wp['ddd'].data.sum() + wpd_detail_2 = wp['dd'].data.sum() + wpd_detail_1 = wp['d'].data.sum() + return np.array([wpd_approximate_3, wpd_approximate_2, wpd_approximate_1]).squeeze(), \ + np.array([wpd_detail_3, wpd_detail_2, wpd_detail_1]).squeeze() + def _decompose_signal(self, input_data) -> Tuple[np.array, np.array]: - if self.wavelet in self.discrete_wavelets: - high_freq, low_freq = pywt.dwt(input_data, self.wavelet, 'smooth') + if self.return_feature_vector: + return self._compute_heuristic_features(input_data) else: - high_freq, low_freq = pywt.cwt(data=input_data, - scales=self.scales, - wavelet=self.wavelet) - low_freq = high_freq[-1, :] - high_freq = np.delete(high_freq, (-1), axis=0) - low_freq = low_freq[np.newaxis, :] - return high_freq, low_freq + if self.wavelet in self.discrete_wavelets: + high_freq, low_freq = pywt.dwt(input_data, self.wavelet, 'smooth') + + else: + high_freq, low_freq = pywt.cwt(data=input_data, + scales=self.scales, + wavelet=self.wavelet) + low_freq = high_freq[-1, :] + high_freq = np.delete(high_freq, (-1), axis=0) + low_freq = low_freq[np.newaxis, :] + return high_freq, low_freq def _decomposing_level(self) -> int: """The level of decomposition of the time series. @@ -53,6 +74,7 @@ def _decomposing_level(self) -> int: """ return pywt.dwt_max_level(len(self.time_series), self.wavelet) + @dask.delayed def _transform_one_sample(self, series: np.array): return self._get_basis(series) @@ -66,7 +88,8 @@ def threshold(Monoid): return ListMonad([Monoid[0][ basis = Either.insert(data).then(decompose).then(threshold).value[0] basis = np.concatenate(basis) - return basis + + return basis[-1, :] if self.use_low_freq else basis def _get_multidim_basis(self, data): def decompose(multidim_signal): diff --git a/fedot_ind/core/operation/transformation/data/park_transformation.py b/fedot_ind/core/operation/transformation/data/park_transformation.py new file mode 100644 index 000000000..69634b5d5 --- /dev/null +++ b/fedot_ind/core/operation/transformation/data/park_transformation.py @@ -0,0 +1,43 @@ +from typing import Union + +import numpy as np +from fedot.core.data.data import InputData + + +def _apply_park_transform(sample): + i_1_ch = 1 + i_2_ch = 2 + i_3_ch = 3 + v_1_ch = 4 + v_2_ch = 5 + v_3_ch = 6 + i_alpha = (2 * sample[:i_1_ch, :] - sample[i_1_ch:i_2_ch, :] - sample[i_2_ch:i_3_ch, :]) / 3 + i_beta = (sample[i_1_ch:i_2_ch, :] - sample[i_2_ch:i_3_ch, :]) / np.sqrt(3) + v_alpha = (2 * sample[i_3_ch:v_1_ch, :] - sample[v_1_ch:v_2_ch, :] - sample[v_2_ch:v_3_ch, :]) / 3 + v_beta = (sample[v_1_ch:v_2_ch, :] - sample[v_2_ch:v_3_ch, :]) / np.sqrt(3) + + # Calculate the instantaneous amplitude and phase of the current and voltage + instantaneous_i_amplitude = np.sqrt(i_alpha ** 2 + i_beta ** 2) + instantaneous_i_phase = np.arctan2(i_beta, i_alpha) + instantaneous_v_amplitude = np.sqrt(v_alpha ** 2 + v_beta ** 2) + instantaneous_v_phase = np.arctan2(v_beta, v_alpha) + return np.concatenate([i_alpha, i_beta, v_alpha, v_beta, instantaneous_i_amplitude, + instantaneous_i_phase, instantaneous_v_amplitude, instantaneous_v_phase]) + + +def park_transform(input_data: Union[InputData, np.ndarray]) -> np.ndarray: + """ + Applies the Park transform to a given DataFrame. + + The Park transform is a way to transform 3-phase electrical data into a 2-phase signal, which adds more information. + + Args: + data (pd.DataFrame): A DataFrame containing the 3-phase electrical data. + + Returns: + pd.DataFrame: The DataFrame with the added 2-phase electrical data. + """ + # Calculate the alpha and beta components of the current and voltage + features = input_data.features if isinstance(input_data, InputData) else input_data + feature_matrix = list(map(lambda x: _apply_park_transform(x), features)) + return np.stack(feature_matrix) diff --git a/fedot_ind/core/operation/transformation/data/point_cloud.py b/fedot_ind/core/operation/transformation/data/point_cloud.py index 6eef30cb9..e2222627f 100644 --- a/fedot_ind/core/operation/transformation/data/point_cloud.py +++ b/fedot_ind/core/operation/transformation/data/point_cloud.py @@ -1,4 +1,5 @@ import pandas as pd +from gtda.time_series import SingleTakensEmbedding from ripser import Rips, ripser from scipy import sparse @@ -74,7 +75,8 @@ def __compute_persistence_landscapes(ts): def time_series_to_point_cloud(self, input_data: np.array = None, - dimension_embed=2) -> np.array: + dimension_embed=3, + use_gtda=False) -> np.array: """Convert a time series into a point cloud in the dimension specified by dimension_embed. Args: @@ -91,11 +93,27 @@ def time_series_to_point_cloud(self, if self.__window_length is None: self.__window_length = dimension_embed - - trajectory_transformer = HankelMatrix(time_series=input_data, - window_size=self.__window_length, - strides=self.stride) - return trajectory_transformer.trajectory_matrix + if use_gtda: + pcd = self.gtda_time_series_to_pcd(input_data, dimension_embed) + else: + trajectory_transformer = HankelMatrix(time_series=input_data, + window_size=self.__window_length, + strides=self.stride) + pcd = trajectory_transformer.trajectory_matrix + return pcd + + def gtda_time_series_to_pcd(self, + input_data: np.array = None, + dimension_embed=3) -> np.array: + embedder_periodic = SingleTakensEmbedding( + parameters_type="fixed", + n_jobs=2, + time_delay=self.__window_length, + dimension=dimension_embed, + stride=self.stride, + ) + embedding = embedder_periodic.fit_transform(input_data) + return embedding def point_cloud_to_persistent_cohomology_ripser( self, point_cloud: np.array = None, max_simplex_dim: int = 1): @@ -174,4 +192,4 @@ def rolling_window(self, array, window): raise ValueError( "Window size cannot exceed the length of the array.") return np.array([array[i:i + window] - for i in range(len(array) - window + 1)]) + for i in range(len(array) - window + 1)]) diff --git a/fedot_ind/core/operation/transformation/representation/__init__.py b/fedot_ind/core/operation/transformation/representation/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/fedot_ind/core/operation/transformation/representation/manifold/__init__.py b/fedot_ind/core/operation/transformation/representation/manifold/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/fedot_ind/core/models/manifold/riemann_embeding.py b/fedot_ind/core/operation/transformation/representation/manifold/riemann_embeding.py similarity index 98% rename from fedot_ind/core/models/manifold/riemann_embeding.py rename to fedot_ind/core/operation/transformation/representation/manifold/riemann_embeding.py index 5fd918a7e..7e6487015 100644 --- a/fedot_ind/core/models/manifold/riemann_embeding.py +++ b/fedot_ind/core/operation/transformation/representation/manifold/riemann_embeding.py @@ -62,6 +62,9 @@ def __init__(self, params: Optional[OperationParameters] = None): 'tangent_space_metric': self.tangent_metric, 'SPD_space_metric': self.spd_metric}) + def __repr__(self): + return 'Riemann Manifold Class for TS representation' + def _init_spaces(self): self.spd_space = Covariances(estimator='scm') self.tangent_space = TangentSpace(metric=self.tangent_metric) diff --git a/fedot_ind/core/operation/transformation/representation/recurrence/__init__.py b/fedot_ind/core/operation/transformation/representation/recurrence/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/fedot_ind/core/models/recurrence/reccurence_extractor.py b/fedot_ind/core/operation/transformation/representation/recurrence/reccurence_extractor.py similarity index 87% rename from fedot_ind/core/models/recurrence/reccurence_extractor.py rename to fedot_ind/core/operation/transformation/representation/recurrence/reccurence_extractor.py index f59b05a66..a93a5a03e 100644 --- a/fedot_ind/core/models/recurrence/reccurence_extractor.py +++ b/fedot_ind/core/operation/transformation/representation/recurrence/reccurence_extractor.py @@ -3,13 +3,12 @@ import numpy as np from fedot.core.data.data import InputData from fedot.core.operations.operation_parameters import OperationParameters -from fedot.core.repository.dataset_types import DataTypesEnum # from fedot_ind.core.metrics.metrics_implementation import * from fedot_ind.core.models.base_extractor import BaseExtractor -from fedot_ind.core.models.recurrence.sequences import RecurrenceFeatureExtractor from fedot_ind.core.operation.transformation.data.hankel import HankelMatrix from fedot_ind.core.operation.transformation.data.kernel_matrix import TSTransformer +from fedot_ind.core.operation.transformation.representation.recurrence.sequences import RecurrenceFeatureExtractor class RecurrenceExtractor(BaseExtractor): @@ -51,6 +50,9 @@ def __init__(self, params: Optional[OperationParameters] = None): self.transformer = TSTransformer self.extractor = RecurrenceFeatureExtractor + def __repr__(self): + return 'Reccurence Class for TS representation' + def _generate_features_from_ts(self, ts: np.array): if self.window_size != 0: trajectory_transformer = HankelMatrix(time_series=ts, @@ -73,13 +75,13 @@ def _generate_features_from_ts(self, ts: np.array): features = specter.ts_to_3d_recurrence_matrix() col_names = {'feature_name': None} - predict = InputData(idx=np.arange(len(features)), - features=features, - target='no_target', - task='no_task', - data_type=DataTypesEnum.table, - supplementary_data=col_names) - return predict + # predict = InputData(idx=np.arange(len(features)), + # features=features, + # target='no_target', + # task='no_task', + # data_type=DataTypesEnum.table, + # supplementary_data=col_names) + return features def generate_recurrence_features(self, ts: np.array) -> InputData: diff --git a/fedot_ind/core/models/recurrence/sequences.py b/fedot_ind/core/operation/transformation/representation/recurrence/sequences.py similarity index 100% rename from fedot_ind/core/models/recurrence/sequences.py rename to fedot_ind/core/operation/transformation/representation/recurrence/sequences.py diff --git a/fedot_ind/core/operation/transformation/representation/statistical/__init__.py b/fedot_ind/core/operation/transformation/representation/statistical/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/fedot_ind/core/models/quantile/quantile_extractor.py b/fedot_ind/core/operation/transformation/representation/statistical/quantile_extractor.py similarity index 58% rename from fedot_ind/core/models/quantile/quantile_extractor.py rename to fedot_ind/core/operation/transformation/representation/statistical/quantile_extractor.py index 17a44e0cd..2ef6b7af0 100644 --- a/fedot_ind/core/models/quantile/quantile_extractor.py +++ b/fedot_ind/core/operation/transformation/representation/statistical/quantile_extractor.py @@ -1,6 +1,6 @@ -from itertools import chain from typing import Optional +import dask from fedot.core.data.data import InputData from fedot.core.operations.operation_parameters import OperationParameters @@ -9,7 +9,7 @@ class QuantileExtractor(BaseExtractor): - """Class responsible for quantile feature generator experiment. + """Class responsible for statistical feature generator experiment. Attributes: window_size (int): size of window @@ -44,47 +44,32 @@ def __init__(self, params: Optional[OperationParameters] = None): self.logging_params.update({'Wsize': self.window_size, 'Stride': self.stride}) + def __repr__(self): + return 'Statistical Class for TS representation' + def _concatenate_global_and_local_feature( self, - global_features: InputData, - window_stat_features: InputData) -> InputData: - - if isinstance(window_stat_features.features[0], list): - window_stat_features.features = np.concatenate( - window_stat_features.features, axis=0) - window_stat_features.supplementary_data['feature_name'] = list( - chain(*window_stat_features.supplementary_data['feature_name'])) + global_features: np.ndarray, + window_stat_features: np.ndarray) -> np.ndarray: + if isinstance(window_stat_features[0], list): + window_stat_features = np.concatenate(window_stat_features, axis=0) - window_stat_features.features = np.concatenate( - [global_features.features, window_stat_features.features], axis=0) - window_stat_features.features = np.nan_to_num( - window_stat_features.features) - - window_stat_features.supplementary_data['feature_name'] = list( - chain(*[global_features.supplementary_data['feature_name'], - window_stat_features.supplementary_data['feature_name']])) + window_stat_features = np.concatenate([global_features, window_stat_features], axis=0) + window_stat_features = np.nan_to_num(window_stat_features) return window_stat_features def extract_stats_features(self, ts: np.array) -> InputData: - global_features = self.get_statistical_features( - ts, add_global_features=True) - if self.window_size != 0: - window_stat_features = self.apply_window_for_stat_feature( - ts_data=ts, - feature_generator=self.get_statistical_features, - window_size=self.window_size) - else: - window_stat_features = self.get_statistical_features(ts) + global_features = self.get_statistical_features(ts, add_global_features=True) + window_stat_features = self.get_statistical_features(ts) if self.window_size == 0 else \ + self.apply_window_for_stat_feature(ts_data=ts, feature_generator=self.get_statistical_features, + window_size=self.window_size) return self._concatenate_global_and_local_feature( global_features, window_stat_features) if self.add_global_features else window_stat_features + @dask.delayed def generate_features_from_ts(self, ts: np.array, window_length: int = None) -> InputData: - if len(ts.shape) == 1: - aggregation_df = self.extract_stats_features(ts) - else: - aggregation_df = self._get_feature_matrix( - self.extract_stats_features, ts) - - return aggregation_df + ts = ts[None, :] if len(ts.shape) == 1 else ts # sanity check for map method + statistical_representation = np.array(list(map(lambda channel: self.extract_stats_features(channel), ts))) + return statistical_representation diff --git a/fedot_ind/core/models/quantile/stat_features.py b/fedot_ind/core/operation/transformation/representation/statistical/stat_features.py similarity index 99% rename from fedot_ind/core/models/quantile/stat_features.py rename to fedot_ind/core/operation/transformation/representation/statistical/stat_features.py index b00e9d811..8e087da14 100644 --- a/fedot_ind/core/models/quantile/stat_features.py +++ b/fedot_ind/core/operation/transformation/representation/statistical/stat_features.py @@ -1,11 +1,12 @@ import warnings -from fedot_ind.core.architecture.settings.computational import backend_methods as np import pandas as pd from scipy.signal import find_peaks from scipy.stats import entropy, linregress from sklearn.preprocessing import MinMaxScaler +from fedot_ind.core.architecture.settings.computational import backend_methods as np + warnings.filterwarnings("ignore") @@ -34,7 +35,7 @@ def diff(array: np.array) -> float: return np.diff(array, n=len(array) - 1)[0] -# Extra methods for quantile features extraction +# Extra methods for statistical features extraction def skewness(array: np.array) -> float: if not isinstance(array, pd.Series): array = pd.Series(array) diff --git a/fedot_ind/core/operation/transformation/representation/tabular/__init__.py b/fedot_ind/core/operation/transformation/representation/tabular/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/fedot_ind/core/models/tabular/tabular_extractor.py b/fedot_ind/core/operation/transformation/representation/tabular/tabular_extractor.py similarity index 71% rename from fedot_ind/core/models/tabular/tabular_extractor.py rename to fedot_ind/core/operation/transformation/representation/tabular/tabular_extractor.py index bb00f650f..c50f8bcad 100644 --- a/fedot_ind/core/models/tabular/tabular_extractor.py +++ b/fedot_ind/core/operation/transformation/representation/tabular/tabular_extractor.py @@ -3,16 +3,19 @@ import numpy as np from fedot.core.data.data import InputData from fedot.core.operations.operation_parameters import OperationParameters +from fedot.core.pipelines.pipeline_builder import PipelineBuilder +from pymonad.either import Either from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from fedot_ind.core.models.base_extractor import BaseExtractor +from fedot_ind.core.operation.transformation.data.park_transformation import park_transform from fedot_ind.core.repository.constanst_repository import KERNEL_BASELINE_FEATURE_GENERATORS from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels class TabularExtractor(BaseExtractor): - """Class responsible for quantile feature generator experiment. + """Class responsible for statistical feature generator experiment. Attributes: window_size (int): size of window @@ -47,6 +50,7 @@ def __init__(self, params: Optional[OperationParameters] = None): self.reduce_dimension = params.get('reduce_dimension', True) self.repo = IndustrialModels().setup_repository() + self.custom_tabular_transformation = {'park_transformation': park_transform} self.pca_is_fitted = False self.scaler = StandardScaler() self.pca = PCA(self.explained_dispersion) @@ -58,6 +62,35 @@ def _reduce_dim(self, features, target): self.pca_is_fitted = True return self.pca.fit_transform(self.scaler.fit_transform(features, target)) + def _create_from_custom_fg(self, input_data): + for model_name, nodes in self.feature_domain.items(): + if model_name.__contains__('custom'): + transform_method = self.custom_tabular_transformation[nodes[0]] + ts_representation = transform_method(input_data) + else: + model = PipelineBuilder() + for node in nodes: + if isinstance(node, tuple): + model.add_node(operation_type=node[0], params=node[1]) + else: + model.add_node(operation_type=node) + model = model.build() + ts_representation = model.fit(input_data).predict + self.feature_list.append(ts_representation) + + def _create_from_default_fg(self, input_data): + feature_domain_models = [model for model in KERNEL_BASELINE_FEATURE_GENERATORS] + + if not self.feature_domain.__contains__('all'): + feature_domain_models = [model for model in feature_domain_models + if model.__contains__(self.feature_domain)] + + for model_name in feature_domain_models: + model = KERNEL_BASELINE_FEATURE_GENERATORS[model_name] + model.heads[0].parameters['use_sliding_window'] = self.use_sliding_window + model = model.build() + self.feature_list.append(model.fit(input_data).predict) + def create_feature_matrix(self, feature_list: list): return np.concatenate([x.reshape(x.shape[0], x.shape[1] * x.shape[2]) for x in feature_list], axis=1).squeeze() @@ -74,17 +107,10 @@ def _transform(self, input_data: InputData) -> np.array: def generate_features_from_ts(self, input_data: InputData, window_length: int = None) -> InputData: - feature_domain_models = [model for model in KERNEL_BASELINE_FEATURE_GENERATORS] + is_custom_feature_representation = isinstance(self.feature_domain, dict) self.feature_list = [] - - if not self.feature_domain.__contains__('all'): - feature_domain_models = [model for model in feature_domain_models - if model.__contains__(self.feature_domain)] - - for model_name in feature_domain_models: - model = KERNEL_BASELINE_FEATURE_GENERATORS[model_name] - model.heads[0].parameters['use_sliding_window'] = self.use_sliding_window - model = model.build() - self.feature_list.append(model.fit(input_data).predict) - + Either(value=input_data, + monoid=[input_data, + is_custom_feature_representation]).either(left_function=self._create_from_default_fg, + right_function=self._create_from_custom_fg) return self.feature_list diff --git a/fedot_ind/core/operation/transformation/representation/topological/__init__.py b/fedot_ind/core/operation/transformation/representation/topological/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/fedot_ind/core/models/topological/topofeatures.py b/fedot_ind/core/operation/transformation/representation/topological/topofeatures.py similarity index 100% rename from fedot_ind/core/models/topological/topofeatures.py rename to fedot_ind/core/operation/transformation/representation/topological/topofeatures.py diff --git a/fedot_ind/core/models/topological/topological_extractor.py b/fedot_ind/core/operation/transformation/representation/topological/topological_extractor.py similarity index 73% rename from fedot_ind/core/models/topological/topological_extractor.py rename to fedot_ind/core/operation/transformation/representation/topological/topological_extractor.py index ac512d428..3ef257ab8 100644 --- a/fedot_ind/core/models/topological/topological_extractor.py +++ b/fedot_ind/core/operation/transformation/representation/topological/topological_extractor.py @@ -1,19 +1,24 @@ import sys from functools import partial +from itertools import product +from typing import Optional +# import open3d as o3d import pandas as pd from fedot.core.data.data import InputData from fedot.core.operations.operation_parameters import OperationParameters from fedot.core.repository.dataset_types import DataTypesEnum +from gtda.homology import VietorisRipsPersistence from gtda.time_series import takens_embedding_optimal_parameters from scipy import stats +from scipy.spatial.distance import squareform, pdist from tqdm import tqdm -from typing import Optional from fedot_ind.core.architecture.settings.computational import backend_methods as np from fedot_ind.core.models.base_extractor import BaseExtractor -from fedot_ind.core.models.topological.topofeatures import PersistenceDiagramsExtractor, TopologicalFeaturesExtractor from fedot_ind.core.operation.transformation.data.point_cloud import TopologicalTransformation +from fedot_ind.core.operation.transformation.representation.topological.topofeatures import \ + PersistenceDiagramsExtractor, TopologicalFeaturesExtractor from fedot_ind.core.repository.constanst_repository import PERSISTENCE_DIAGRAM_EXTRACTOR, PERSISTENCE_DIAGRAM_FEATURES sys.setrecursionlimit(1000000000) @@ -53,6 +58,10 @@ def __init__(self, params: Optional[OperationParameters] = None): persistence_diagram_features=PERSISTENCE_DIAGRAM_FEATURES ) self.data_transformer = None + self.save_pcd = False + + def __repr__(self): + return 'Topological Class for TS representation' def __evaluate_persistence_params(self, ts_data: np.array): if self.feature_extractor is None: @@ -67,12 +76,43 @@ def __evaluate_persistence_params(self, ts_data: np.array): persistence_diagram_extractor=persistence_diagram_extractor, persistence_diagram_features=PERSISTENCE_DIAGRAM_FEATURES) + def _generate_vr_mesh(self, pcd): + # Corresponding matrix of Euclidean pairwise distances + pairwise_distances = squareform(pdist(pcd)) + # Default parameter for ``metric`` is "euclidean" + vr_graph = VietorisRipsPersistence(metric="precomputed").fit_transform([pairwise_distances]) + return vr_graph + + def _generate_pcd(self, ts_data, persistence_params): + window_size_range = list(range(1, 35, 5)) + stride_range = list(range(1, 15, 3)) + list(product(window_size_range, stride_range)) + # for params in pcd_params: + # data_transformer = TopologicalTransformation(stride=params[1], persistence_params=persistence_params, + # window_length=round(ts_data.shape[0] * 0.01 * params[0])) + # point_cloud = data_transformer.time_series_to_point_cloud(input_data=ts_data, use_gtda=True) + # # VR_mesh = self._generate_vr_mesh(point_cloud) + # for scale in range(1, 15, 3): + # numpy2stl(point_cloud, + # f"./stl_scale_{scale}_ws_{params[0]}_stride_{params[1]}.stl", + # max_width=300., + # max_depth=200., + # max_height=300., + # scale=scale, + # min_thickness_percent=0.5, + # solid=False) + # pcd = o3d.geometry.PointCloud() + # pcd.points = o3d.utility.Vector3dVector(point_cloud) + # o3d.io.write_point_cloud(f"./pcd_ws_{params[0]}_stride_{params[1]}.ply", pcd) + def _generate_features_from_ts(self, ts_data: np.array, persistence_params: dict) -> InputData: + if self.save_pcd: + self._generate_pcd(ts_data, persistence_params) if self.data_transformer is None: self.data_transformer = TopologicalTransformation( persistence_params=persistence_params, window_length=round(ts_data.shape[0] * 0.01 * self.window_size)) - point_cloud = self.data_transformer.time_series_to_point_cloud(input_data=ts_data) + point_cloud = self.data_transformer.time_series_to_point_cloud(input_data=ts_data, use_gtda=True) topological_features = self.feature_extractor.transform(point_cloud) topological_features = InputData(idx=np.arange(len(topological_features.values)), features=topological_features.values, diff --git a/fedot_ind/core/optimizer/FedotEvoOptimizer.py b/fedot_ind/core/optimizer/FedotEvoOptimizer.py new file mode 100644 index 000000000..8a1b3b349 --- /dev/null +++ b/fedot_ind/core/optimizer/FedotEvoOptimizer.py @@ -0,0 +1,61 @@ +from typing import Sequence + +from golem.core.optimisers.adaptive.mab_agents.contextual_mab_agent import ContextualMultiArmedBanditAgent +from golem.core.optimisers.adaptive.mab_agents.mab_agent import MultiArmedBanditAgent +from golem.core.optimisers.adaptive.mab_agents.neural_contextual_mab_agent import NeuralContextualMultiArmedBanditAgent +from golem.core.optimisers.adaptive.operator_agent import RandomAgent +from golem.core.optimisers.genetic.gp_optimizer import EvoGraphOptimizer +from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters +from golem.core.optimisers.graph import OptGraph +from golem.core.optimisers.objective import Objective +from golem.core.optimisers.optimization_parameters import GraphRequirements +from golem.core.optimisers.optimizer import GraphGenerationParams + +from fedot_ind.core.repository.constanst_repository import FEDOT_MUTATION_STRATEGY + + +class FedotEvoOptimizer(EvoGraphOptimizer): + def __init__(self, + objective: Objective, + initial_graphs: Sequence[OptGraph], + requirements: GraphRequirements, + graph_generation_params: GraphGenerationParams, + graph_optimizer_params: GPAlgorithmParameters, + optimisation_params: dict = None): + + graph_optimizer_params = self._exclude_resample_from_mutations(graph_optimizer_params) + self.mutation_agent_dict = {'random': RandomAgent, + 'bandit': MultiArmedBanditAgent, + 'contextual_bandit': ContextualMultiArmedBanditAgent, + 'neural_bandit': NeuralContextualMultiArmedBanditAgent} + if optimisation_params is not None: + graph_optimizer_params.adaptive_mutation_type = self._set_optimisation_strategy(graph_optimizer_params, + optimisation_params) + super().__init__(objective, initial_graphs, requirements, + graph_generation_params, graph_optimizer_params) + self.requirements = requirements + # self.eval_dispatcher = IndustrialDispatcher( + # adapter=graph_generation_params.adapter, + # n_jobs=requirements.n_jobs, + # graph_cleanup_fn=_try_unfit_graph, + # delegate_evaluator=graph_generation_params.remote_evaluator) + + def _set_optimisation_strategy(self, graph_optimizer_params, optimisation_params): + mutation_probs = FEDOT_MUTATION_STRATEGY[optimisation_params['mutation_strategy']] + mutation_agent = self.mutation_agent_dict[optimisation_params['mutation_agent']] + if optimisation_params['mutation_agent'].__contains__('random'): + mutation_agent = mutation_agent(actions=graph_optimizer_params.mutation_types, + probs=mutation_probs) + else: + mutation_agent = mutation_agent(actions=graph_optimizer_params.mutation_types) + return mutation_agent + + def _exclude_resample_from_mutations(self, graph_optimizer_params): + for mutation in graph_optimizer_params.mutation_types: + try: + is_invalid = mutation.__name__.__contains__('resample') + except Exception: + is_invalid = mutation.name.__contains__('resample') + if is_invalid: + graph_optimizer_params.mutation_types.remove(mutation) + return graph_optimizer_params diff --git a/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py b/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py index 49990cc4c..7e3aa6ca3 100644 --- a/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py +++ b/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py @@ -26,14 +26,7 @@ def __init__(self, graph_generation_params: GraphGenerationParams, graph_optimizer_params: GPAlgorithmParameters): - for mutation in graph_optimizer_params.mutation_types: - try: - is_invalid = mutation.__name__.__contains__('resample') - except Exception: - is_invalid = mutation.name.__contains__('resample') - if is_invalid: - graph_optimizer_params.mutation_types.remove(mutation) - + graph_optimizer_params = self._exclude_resample_from_mutations(graph_optimizer_params) graph_optimizer_params.adaptive_mutation_type = RandomAgent(actions=graph_optimizer_params.mutation_types, probs=FEDOT_MUTATION_STRATEGY[ 'params_mutation_strategy']) @@ -52,6 +45,16 @@ def _create_initial_population(self, initial_assumption): for graph in initial_assumption] return initial_individuals + def _exclude_resample_from_mutations(self, graph_optimizer_params): + for mutation in graph_optimizer_params.mutation_types: + try: + is_invalid = mutation.__name__.__contains__('resample') + except Exception: + is_invalid = mutation.name.__contains__('resample') + if is_invalid: + graph_optimizer_params.mutation_types.remove(mutation) + return graph_optimizer_params + def _initial_population(self, evaluator: EvaluationOperator): """ Initializes the initial population """ # Adding of initial assumptions to history as zero generation diff --git a/fedot_ind/core/repository/IndustrialDispatcher.py b/fedot_ind/core/repository/IndustrialDispatcher.py index 520d5347b..b6bc1cd46 100644 --- a/fedot_ind/core/repository/IndustrialDispatcher.py +++ b/fedot_ind/core/repository/IndustrialDispatcher.py @@ -4,6 +4,7 @@ from datetime import datetime from typing import Optional, Tuple +import dask from golem.core.log import Log from golem.core.optimisers.genetic.evaluation import MultiprocessingDispatcher from golem.core.optimisers.genetic.operators.operator import EvaluationOperator, PopulationT @@ -13,7 +14,7 @@ from golem.core.optimisers.timer import Timer from golem.utilities.memory import MemoryAnalytics from golem.utilities.utilities import determine_n_jobs -from joblib import wrap_non_picklable_objects, parallel_backend +from joblib import wrap_non_picklable_objects from pymonad.either import Either from pymonad.maybe import Maybe @@ -30,17 +31,14 @@ def dispatch(self, objective: ObjectiveFunction, return self.evaluate_with_cache def _multithread_eval(self, individuals_to_evaluate): - with parallel_backend(backend='dask', - n_jobs=self.n_jobs, - scatter=[individuals_to_evaluate] - ): - log = Log().get_parameters() - evaluation_results = list(map(lambda ind: - self.industrial_evaluate_single(self, - graph=ind.graph, - uid_of_individual=ind.uid, - logs_initializer=log), - individuals_to_evaluate)) + log = Log().get_parameters() + evaluation_results = list(map(lambda ind: + self.industrial_evaluate_single(self, + graph=ind.graph, + uid_of_individual=ind.uid, + logs_initializer=log), + individuals_to_evaluate)) + evaluation_results = dask.compute(*evaluation_results) return evaluation_results def _eval_at_least_one(self, individuals): @@ -80,7 +78,22 @@ def evaluate_population(self, individuals: PopulationT) -> PopulationT: logging_level=logging.INFO) return successful_evals - # @delayed + @dask.delayed + def eval_ind(self, graph, uid_of_individual): + adapted_evaluate = self._adapter.adapt_func(self._evaluate_graph) + start_time = timeit.default_timer() + fitness, graph = adapted_evaluate(graph) + end_time = timeit.default_timer() + eval_time_iso = datetime.now().isoformat() + eval_res = GraphEvalResult( + uid_of_individual=uid_of_individual, + fitness=fitness, + graph=graph, + metadata={ + 'computation_time_in_seconds': end_time - start_time, + 'evaluation_time_iso': eval_time_iso}) + return eval_res + @wrap_non_picklable_objects def industrial_evaluate_single(self, graph: OptGraph, @@ -100,17 +113,4 @@ def industrial_evaluate_single(self, # in case of multiprocessing run Log.setup_in_mp(*logs_initializer) - adapted_evaluate = self._adapter.adapt_func(self._evaluate_graph) - start_time = timeit.default_timer() - fitness, graph = adapted_evaluate(graph) - end_time = timeit.default_timer() - eval_time_iso = datetime.now().isoformat() - - eval_res = GraphEvalResult( - uid_of_individual=uid_of_individual, - fitness=fitness, - graph=graph, - metadata={ - 'computation_time_in_seconds': end_time - start_time, - 'evaluation_time_iso': eval_time_iso}) - return eval_res + return self.eval_ind(graph, uid_of_individual) diff --git a/fedot_ind/core/repository/constanst_repository.py b/fedot_ind/core/repository/constanst_repository.py index b691ee0fa..079937b8b 100644 --- a/fedot_ind/core/repository/constanst_repository.py +++ b/fedot_ind/core/repository/constanst_repository.py @@ -1,4 +1,5 @@ import math +import pathlib from enum import Enum from multiprocessing import cpu_count @@ -6,29 +7,45 @@ import pywt import spectrum from MKLpy.algorithms import FHeuristic, RMKL, MEMO, CKA, PWMK +from dask_ml.decomposition import TruncatedSVD as DaskSVD +from fedot.core.operations.evaluation.operation_implementations.models.boostings_implementations import \ + FedotCatBoostRegressionImplementation, FedotCatBoostClassificationImplementation from fedot.core.pipelines.pipeline_builder import PipelineBuilder from fedot.core.repository.dataset_types import DataTypesEnum from fedot.core.repository.metrics_repository import ClassificationMetricsEnum, RegressionMetricsEnum from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams from golem.core.tuning.optuna_tuner import OptunaTuner -from golem.core.tuning.simultaneous import SimultaneousTuner +from lightgbm.sklearn import LGBMClassifier, LGBMRegressor from scipy.spatial.distance import euclidean, cosine, cityblock, correlation, chebyshev, \ minkowski +from sklearn.ensemble import ExtraTreesRegressor, GradientBoostingClassifier, \ + RandomForestClassifier +from sklearn.linear_model import ( + Lasso as SklearnLassoReg, + LogisticRegression as SklearnLogReg, + Ridge as SklearnRidgeReg, + SGDRegressor as SklearnSGD +) +from sklearn.neural_network import MLPClassifier +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from torch import nn +from xgboost import XGBRegressor +from fedot_ind.api.utils.path_lib import PROJECT_PATH from fedot_ind.core.metrics.metrics_implementation import calculate_classification_metric, calculate_regression_metric, \ calculate_forecasting_metric, calculate_detection_metric from fedot_ind.core.models.nn.network_modules.losses import CenterLoss, CenterPlusLoss, ExpWeightedLoss, FocalLoss, \ HuberLoss, LogCoshLoss, MaskedLossWrapper, RMSELoss, SMAPELoss, TweedieLoss -from fedot_ind.core.models.quantile.stat_features import autocorrelation, ben_corr, crest_factor, energy, \ +from fedot_ind.core.models.ts_forecasting.eigen_autoreg import EigenAR +from fedot_ind.core.operation.transformation.data.hankel import HankelMatrix +from fedot_ind.core.operation.transformation.representation.statistical.stat_features import autocorrelation, ben_corr, \ + crest_factor, energy, \ hjorth_complexity, hjorth_mobility, hurst_exponent, interquartile_range, kurtosis, mean_ema, mean_moving_median, \ mean_ptp_distance, n_peaks, pfd, ptp_amp, q25, q5, q75, q95, shannon_entropy, skewness, slope, zero_crossing_rate -from fedot_ind.core.models.topological.topofeatures import AverageHoleLifetimeFeature, \ +from fedot_ind.core.operation.transformation.representation.topological.topofeatures import AverageHoleLifetimeFeature, \ AveragePersistenceLandscapeFeature, BettiNumbersSumFeature, HolesNumberFeature, MaxHoleLifeTimeFeature, \ PersistenceDiagramsExtractor, PersistenceEntropyFeature, RadiusAtMaxBNFeature, RelevantHolesNumber, \ SimultaneousAliveHolesFeature, SumHoleLifetimeFeature -from fedot_ind.core.models.ts_forecasting.eigen_autoreg import EigenAR -from fedot_ind.core.operation.transformation.data.hankel import HankelMatrix industrial_model_params_dict = dict(quantile_extractor={'window_size': 10, 'stride': 1, @@ -147,6 +164,22 @@ class DataTypeConstant(Enum): TRAJECTORY_MATRIX = HankelMatrix +class PathConstant(Enum): + IND_DATA_OPERATION_PATH = pathlib.Path(PROJECT_PATH, 'fedot_ind', 'core', 'repository', 'data', + 'industrial_data_operation_repository.json') + DEFAULT_DATA_OPERATION_PATH = pathlib.Path('data_operation_repository.json') + IND_MODEL_OPERATION_PATH = pathlib.Path(PROJECT_PATH, 'fedot_ind', 'core', 'repository', 'data', + 'industrial_model_repository.json') + DEFAULT_MODEL_OPERATION_PATH = pathlib.Path('model_repository.json') + + +class SolverConstant(Enum): + SOLVER_MODELS = {'np_svd_solver': np.linalg.svd, + 'np_qr_solver': np.linalg.qr, + 'dask_svd_solver': DaskSVD + } + + class FeatureConstant(Enum): STAT_METHODS = { 'mean_': np.mean, @@ -283,7 +316,7 @@ class FedotOperationConstant(Enum): 'table': DataTypesEnum.table} FEDOT_TUNER_STRATEGY = { 'optuna': OptunaTuner, - 'simultaneous': SimultaneousTuner, + # 'simultaneous': SimultaneousTuner, } FEDOT_HEAD_ENSEMBLE = {'regression': 'treg', 'classification': 'xgboost'} @@ -335,15 +368,47 @@ class FedotOperationConstant(Enum): 'classification': PipelineBuilder().add_node('logit'), 'regression': PipelineBuilder().add_node('treg') } - + # mutation order - [param_change,model_change,add_preproc_model,drop_model,add_model] FEDOT_MUTATION_STRATEGY = { - 'params_mutation_strategy': [0.4, 0.2, 0.2, 0.1, 0.1], + # 'params_mutation_strategy': [0.6, 0.25, 0.05, 0.05, 0.05], + 'params_mutation_strategy': [0.7, 0.3, 0.00, 0.00, 0.0], 'growth_mutation_strategy': [0.15, 0.15, 0.3, 0.1, 0.3], 'regularization_mutation_strategy': [0.2, 0.3, 0.1, 0.3, 0.1], } EXPLAINABLE_MODELS = ['recurrence_extractor', ] + SKLEARN_CLF_MODELS = { + # boosting models (bid datasets) + 'xgboost': GradientBoostingClassifier, + 'catboost': FedotCatBoostClassificationImplementation, + # solo linear models + 'logit': SklearnLogReg, + # solo tree models + 'dt': DecisionTreeClassifier, + # ensemble tree models + 'rf': RandomForestClassifier, + # solo nn models + 'mlp': MLPClassifier, + # external models + 'lgbm': LGBMClassifier, + } + + SKLEARN_REG_MODELS = { + # boosting models (bid datasets) + 'xgbreg': XGBRegressor, + 'sgdr': SklearnSGD, + # ensemble tree models (big datasets) + 'treg': ExtraTreesRegressor, + # solo linear models with regularization + 'ridge': SklearnRidgeReg, + 'lasso': SklearnLassoReg, + # solo tree models (small datasets) + 'dtreg': DecisionTreeRegressor, + # external models + 'lgbmreg': LGBMRegressor, + "catboostreg": FedotCatBoostRegressionImplementation + } class ModelCompressionConstant(Enum): @@ -752,6 +817,11 @@ class UnitTestConstant(Enum): KERNEL_BASELINE_NODE_LIST = KernelsConstant.KERNEL_BASELINE_NODE_LIST.value KERNEL_DISTANCE_METRIC = KernelsConstant.KERNEL_DISTANCE_METRIC.value +SOLVER_MODELS = SolverConstant.SOLVER_MODELS.value +DEFAULT_SVD_SOLVER = SOLVER_MODELS['np_svd_solver'] +DEFAULT_QR_SOLVER = SOLVER_MODELS['np_qr_solver'] +DASK_SVD_SOLVER = SOLVER_MODELS['dask_svd_solver'] + AVAILABLE_ANOMALY_DETECTION_OPERATIONS = FedotOperationConstant.AVAILABLE_ANOMALY_DETECTION_OPERATIONS.value AVAILABLE_REG_OPERATIONS = FedotOperationConstant.AVAILABLE_REG_OPERATIONS.value AVAILABLE_CLS_OPERATIONS = FedotOperationConstant.AVAILABLE_CLS_OPERATIONS.value @@ -769,17 +839,25 @@ class UnitTestConstant(Enum): FEDOT_DATA_TYPE = FedotOperationConstant.FEDOT_DATA_TYPE.value FEDOT_MUTATION_STRATEGY = FedotOperationConstant.FEDOT_MUTATION_STRATEGY.value EXPLAINABLE_MODELS = FedotOperationConstant.EXPLAINABLE_MODELS.value +SKLEARN_CLF_IMP = FedotOperationConstant.SKLEARN_CLF_MODELS.value +SKLEARN_REG_IMP = FedotOperationConstant.SKLEARN_REG_MODELS.value CPU_NUMBERS = ComputationalConstant.CPU_NUMBERS.value BATCH_SIZE_FOR_FEDOT_WORKER = ComputationalConstant.BATCH_SIZE_FOR_FEDOT_WORKER.value FEDOT_WORKER_NUM = ComputationalConstant.FEDOT_WORKER_NUM.value FEDOT_WORKER_TIMEOUT_PARTITION = ComputationalConstant.FEDOT_WORKER_TIMEOUT_PARTITION.value PATIENCE_FOR_EARLY_STOP = ComputationalConstant.PATIENCE_FOR_EARLY_STOP.value +DASK_CLIENT = None MULTI_ARRAY = DataTypeConstant.MULTI_ARRAY.value MATRIX = DataTypeConstant.MATRIX.value TRAJECTORY_MATRIX = DataTypeConstant.TRAJECTORY_MATRIX.value +IND_MODEL_OPERATION_PATH = PathConstant.IND_MODEL_OPERATION_PATH.value +IND_DATA_OPERATION_PATH = PathConstant.IND_DATA_OPERATION_PATH.value +DEFAULT_DATA_OPERATION_PATH = PathConstant.DEFAULT_DATA_OPERATION_PATH.value +DEFAULT_MODEL_OPERATION_PATH = PathConstant.DEFAULT_MODEL_OPERATION_PATH.value + ENERGY_THR = ModelCompressionConstant.ENERGY_THR.value DECOMPOSE_MODE = ModelCompressionConstant.DECOMPOSE_MODE.value FORWARD_MODE = ModelCompressionConstant.FORWARD_MODE.value diff --git a/fedot_ind/core/repository/dask_models.py b/fedot_ind/core/repository/dask_models.py new file mode 100644 index 000000000..2a8bf8996 --- /dev/null +++ b/fedot_ind/core/repository/dask_models.py @@ -0,0 +1,182 @@ +from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin +from sklearn.utils.validation import check_X_y, check_array +from dask_ml.linear_model import LogisticRegression, LinearRegression +from dask_ml.decomposition import PCA +import numpy as np +import dask.array as da + + +class DaskLogisticRegression(BaseEstimator, ClassifierMixin): + def __init__(self, params): + """ + Custom estimator based on Dask LogisticRegression. + """ + self.penalty = params.get('penalty', 'l2') + self.C = params.get('C', 1.0) + self.model_ = None # Placeholder for the internal Dask model + self.solver = 'admm' + + def fit(self, X, y): + """ + Fit the model using Dask's LogisticRegression. + """ + + X, y = check_X_y(X, y, accept_sparse=True, dtype=None) + self.classes_ = np.unique(y) + if not isinstance(X, da.Array): + X = da.from_array(X) + if not isinstance(y, da.Array): + y = da.from_array(y) + + self.model_ = LogisticRegression( + penalty=self.penalty, + C=self.C, + ) + self.model_.fit(X, y) + return self + + def predict(self, X): + """ + Predict class labels for samples in X. + """ + X = check_array(X, accept_sparse=True, dtype=None) + if not isinstance(X, da.Array): + X = da.from_array(X) + return self.model_.predict(X).compute() + + def predict_proba(self, X): + """ + Predict probabilities for samples in X. + """ + X = check_array(X, accept_sparse=True, dtype=None) + if not isinstance(X, da.Array): + X = da.from_array(X) + return self.model_.predict_proba(X).compute() + + def score(self, X, y): + """ + Returns the accuracy of the model. + """ + y_pred = self.predict(X) + return np.mean(y_pred == y) + + def get_params(self, deep=True): + """ + Return hyperparameter dictionary for compatibility with GridSearchCV. + """ + return { + "penalty": self.penalty, + "C": self.C, + } + + def set_params(self, **params): + """ + Set hyperparameters. + """ + for key, value in params.items(): + setattr(self, key, value) + return self + + +class DaskRidgeRegression(BaseEstimator, ClassifierMixin): + def __init__(self, params): + self.C = params.get('alpha') + self.model_ = None # Placeholder for the internal Dask model + + def fit(self, X, y): + """ + Fit the model using Dask's LinearRegression. + """ + X, y = check_X_y(X, y, accept_sparse=True, dtype=None) + if not isinstance(X, da.Array): + X = da.from_array(X) + if not isinstance(y, da.Array): + y = da.from_array(y) + + self.model_ = LinearRegression(C=self.C) + self.model_.fit(X, y) + return self + + def predict(self, X): + """ + Predict class labels for samples in X. + """ + X = check_array(X, accept_sparse=True, dtype=None) + if not isinstance(X, da.Array): + X = da.from_array(X) + return self.model_.predict(X).compute() + + def score(self, X, y): + """ + Returns the accuracy of the model. + """ + y_pred = self.predict(X) + return np.mean(y_pred == y) + + def get_params(self, deep=True): + """ + Return hyperparameter dictionary for compatibility with GridSearchCV. + """ + return { + "alpha": self.C, + } + + def set_params(self, **params): + """ + Set hyperparameters. + """ + for key, value in params.items(): + setattr(self, key, value) + return self + + +class DaskPCA(BaseEstimator, TransformerMixin): + def __init__(self, params): + self.n_components = params.get('n_components') + self.model_ = None + + def fit(self, X): + """ + Fit the model using Dask's PCA. + """ + X = check_array(X, accept_sparse=True, dtype=None) + if not isinstance(X, da.Array): + X = da.from_array(X) + + self.model_ = PCA(n_components=self.n_components) + self.model_.fit(X) + return self + + def transform(self, X): + """ + Transform the data using the fitted PCA model. + """ + X = check_array(X, accept_sparse=True, dtype=None) + if not isinstance(X, da.Array): + X = da.from_array(X) + return self.model_.transform(X) + + def get_params(self, deep=True): + """ + Return hyperparameter dictionary for compatibility with GridSearchCV. + """ + return { + "n_components": self.n_components, + } + + def set_params(self, **params): + """ + Set hyperparameters. + """ + for key, value in params.items(): + setattr(self, key, value) + return self + + def inverse_transform(self, X): + """ + Transform the data back to its original space. + """ + X = check_array(X, accept_sparse=True, dtype=None) + if not isinstance(X, da.Array): + X = da.from_array(X) + return self.model_.inverse_transform(X) diff --git a/fedot_ind/core/repository/data/default_operation_params.json b/fedot_ind/core/repository/data/default_operation_params.json index 2ac985979..e9f932030 100644 --- a/fedot_ind/core/repository/data/default_operation_params.json +++ b/fedot_ind/core/repository/data/default_operation_params.json @@ -70,15 +70,18 @@ "logit": { "C": 1, "penalty": "l2", - "solver": "liblinear" + "solver": "lbfgs" }, "rf": { - "criterion":"gini", + "criterion": "gini", "max_features": 0.9, "min_samples_split": 5, "min_samples_leaf": 5, "bootstrap": false }, + "pdl_clf": { + "model": "rf" + }, "ridge": { "alpha": 1.0 }, @@ -124,6 +127,9 @@ "min_samples_leaf": 10, "bootstrap": false }, + "pdl_reg": { + "model": "treg" + }, "dt": { "max_depth": 5, "min_samples_split": 10, diff --git a/fedot_ind/core/repository/data/industrial_data_operation_repository.json b/fedot_ind/core/repository/data/industrial_data_operation_repository.json index 2e41f1cf1..0bfef9ecc 100644 --- a/fedot_ind/core/repository/data/industrial_data_operation_repository.json +++ b/fedot_ind/core/repository/data/industrial_data_operation_repository.json @@ -220,13 +220,14 @@ ] }, "topological_extractor": { - "meta": "custom_preprocessing", + "meta": "industrial_preprocessing", "presets": [ "fast_train" ], "tags": [ "extractor" - ]}, + ] + }, "minirocket_extractor": { "meta": "industrial_preprocessing", "presets": [ diff --git a/fedot_ind/core/repository/data/industrial_model_repository.json b/fedot_ind/core/repository/data/industrial_model_repository.json index c4a87aefb..db6ca6342 100644 --- a/fedot_ind/core/repository/data/industrial_model_repository.json +++ b/fedot_ind/core/repository/data/industrial_model_repository.json @@ -613,17 +613,43 @@ }, "rf": { "meta": "sklearn_class", - "presets": ["fast_train", "*tree"], - "tags": ["tree", "non_linear"] + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "tree", + "non_linear" + ] + }, + "pdl_clf": { + "meta": "sklearn_class", + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "tree", + "non_linear" + ] }, "rfr": { "meta": "sklearn_regr", - "presets": ["fast_train", "*tree"], - "tags": ["tree", "non_linear"] + "presets": [ + "fast_train", + "*tree" + ], + "tags": [ + "tree", + "non_linear" + ] }, "ridge": { "meta": "sklearn_regr", - "presets": ["fast_train", "ts"], + "presets": [ + "fast_train", + "ts" + ], "tags": [ "simple", "linear", @@ -723,7 +749,19 @@ }, "treg": { "meta": "sklearn_regr", - "presets": ["*tree"], + "presets": [ + "*tree" + ], + "tags": [ + "tree", + "non_linear" + ] + }, + "pdl_reg": { + "meta": "sklearn_regr", + "presets": [ + "*tree" + ], "tags": [ "tree", "non_linear" @@ -731,9 +769,14 @@ }, "xgboost": { "meta": "sklearn_class", - "presets": ["*tree"], + "presets": [ + "*tree" + ], "tags": [ - "boosting", "tree", "non-default", "non_linear" + "boosting", + "tree", + "non-default", + "non_linear" ] }, "xgbreg": { diff --git a/fedot_ind/core/repository/excluded.py b/fedot_ind/core/repository/excluded.py index 188d996b3..063e446de 100644 --- a/fedot_ind/core/repository/excluded.py +++ b/fedot_ind/core/repository/excluded.py @@ -15,13 +15,13 @@ ) from sklearn.naive_bayes import BernoulliNB as SklearnBernoulliNB, MultinomialNB as SklearnMultinomialNB -from fedot_ind.core.models.manifold.riemann_embeding import RiemannExtractor from fedot_ind.core.models.nn.network_impl.dummy_nn import DummyOverComplicatedNeuralNetwork from fedot_ind.core.models.nn.network_impl.explainable_convolution_model import XCModel from fedot_ind.core.models.nn.network_impl.lora_nn import LoraModel from fedot_ind.core.models.nn.network_impl.tst import TSTModel from fedot_ind.core.operation.dummy.dummy_operation import DummyOperation from fedot_ind.core.operation.filtration.feature_filtration import FeatureFilter +from fedot_ind.core.operation.transformation.representation.manifold.riemann_embeding import RiemannExtractor EXCLUDED_OPERATION_MUTATION = { 'regression': ['recurrence_extractor', diff --git a/fedot_ind/core/repository/industrial_implementations/abstract.py b/fedot_ind/core/repository/industrial_implementations/abstract.py index a9d45083d..bf047fa5f 100644 --- a/fedot_ind/core/repository/industrial_implementations/abstract.py +++ b/fedot_ind/core/repository/industrial_implementations/abstract.py @@ -217,17 +217,12 @@ def _create_tuner(tuning_params, tuning_data): replace_default_search_space=True) pipeline_tuner = TunerBuilder( train_data.task).with_search_space(search_space).with_tuner( - tuning_params['tuner']).with_n_jobs(1).with_metric( + tuning_params['tuner']).with_n_jobs(-1).with_metric( tuning_params['metric']).with_timeout( - tuning_params.get( - 'tuning_timeout', - 15)).with_early_stopping_rounds( - tuning_params.get( - 'tuning_early_stop', - 50)).with_iterations( - tuning_params.get( - 'tuning_iterations', - 150)).build(tuning_data) + tuning_params.get('tuning_timeout', 15.0)).build(tuning_data) + # with_iterations(tuning_params.get('tuning_iterations',150)).\ + # with_early_stopping_rounds(tuning_params.get('tuning_early_stop', 50)) + return pipeline_tuner if isinstance(model_to_tune, dict): @@ -502,6 +497,8 @@ def predict_operation_industrial( trained_operation=fitted_operation, predict_data=data, output_mode=output_mode) + is_numpy_predict = isinstance(prediction.predict, np.ndarray) + prediction.predict = prediction.predict.detach().numpy() if not is_numpy_predict else prediction.predict prediction = self.assign_tabular_column_types(prediction, output_mode) # any inplace operations here are dangerous! diff --git a/fedot_ind/core/repository/industrial_implementations/ml_optimisation.py b/fedot_ind/core/repository/industrial_implementations/ml_optimisation.py new file mode 100644 index 000000000..f53343ec8 --- /dev/null +++ b/fedot_ind/core/repository/industrial_implementations/ml_optimisation.py @@ -0,0 +1,170 @@ +import datetime +from copy import deepcopy +from datetime import timedelta +from functools import partial +from typing import Optional, Tuple, Union, Sequence + +import optuna +from dask.distributed import wait +from fedot.core.constants import DEFAULT_TUNING_ITERATIONS_NUMBER +from fedot.core.data.data import InputData +from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder +from golem.core.adapter import BaseOptimizationAdapter +from golem.core.optimisers.graph import OptGraph +from golem.core.optimisers.objective import ObjectiveFunction +from golem.core.tuning.search_space import SearchSpace, get_node_operation_parameter_label +from golem.core.tuning.tuner_interface import BaseTuner, DomainGraphForTune +from optuna import Trial, Study +from optuna.trial import FrozenTrial + + +class DaskOptunaTuner(BaseTuner): + def __init__(self, objective_evaluate: ObjectiveFunction, + search_space: SearchSpace, + adapter: Optional[BaseOptimizationAdapter] = None, + iterations: int = 100, + early_stopping_rounds: Optional[int] = None, + timeout: timedelta = timedelta(minutes=5), + n_jobs: int = -1, + deviation: float = 0.05, + objectives_number: int = 1): + super().__init__(objective_evaluate, + search_space, + adapter, + iterations, + early_stopping_rounds, + timeout, + n_jobs, + deviation) + self.objectives_number = objectives_number + self.study = None + self.iterations = 100 + self.n_trials = 10 + + def _dask_backend_tune(self, predefined_objective, show_progress): + self.storage = optuna.integration.DaskStorage() + # self.storage = optuna.integration.dask.DaskStorage() + self.study = optuna.create_study(storage=self.storage, + direction='minimize') # ['minimize'] * self.objectives_number + # Submit self.n_trials different optimization tasks, where each task runs self.iterations optimization trials + from fedot_ind.core.repository.constanst_repository import DASK_CLIENT + client = DASK_CLIENT + futures = [client.submit(self.study.optimize, + predefined_objective, + n_trials=self.iterations, + n_jobs=self.n_jobs, + timeout=self.timeout.seconds, + callbacks=[self.early_stopping_callback], + show_progress_bar=show_progress) for _ in range(self.n_trials)] + wait(futures) + print(f"Best params: {self.study.best_params}") + + def tune(self, graph: DomainGraphForTune, show_progress: bool = True) -> \ + Union[DomainGraphForTune, Sequence[DomainGraphForTune]]: + graph = self.adapter.adapt(graph) + predefined_objective = partial(self.objective, graph=graph) + is_multi_objective = self.objectives_number > 1 + self.init_check(graph) + init_parameters, has_parameters_to_optimize = self._get_initial_point(graph) + + if not has_parameters_to_optimize: + self._stop_tuning_with_message(f'Graph {graph.graph_description} has no parameters to optimize') + tuned_graphs = self.init_graph + else: + # Enqueue initial point to try + verbosity_level = optuna.logging.INFO if show_progress else optuna.logging.WARNING + optuna.logging.set_verbosity(verbosity_level) + self._dask_backend_tune(predefined_objective, show_progress) + tuned_graphs = self.set_arg_graph(graph, self.study.best_trials[0].params) if not is_multi_objective else \ + [self.set_arg_graph(deepcopy(graph), best_trial.params) for best_trial in self.study.best_trials] + self.was_tuned = True + + final_graphs = self.final_check(tuned_graphs, is_multi_objective) + final_graphs = self.adapter.restore(final_graphs) + return final_graphs + + def objective(self, trial: Trial, graph: OptGraph) -> Union[float, Sequence[float,]]: + new_parameters = self._get_parameters_from_trial(graph, trial) + new_graph = BaseTuner.set_arg_graph(graph, new_parameters) + metric_value = self.get_metric_value(new_graph) + return metric_value + + def _get_parameters_from_trial(self, graph: OptGraph, trial: Trial) -> dict: + new_parameters = {} + for node_id, node in enumerate(graph.nodes): + operation_name = node.name + + # Get available parameters for operation + tunable_node_params = self.search_space.parameters_per_operation.get(operation_name, {}) + + for parameter_name, parameter_properties in tunable_node_params.items(): + node_op_parameter_name = get_node_operation_parameter_label(node_id, operation_name, parameter_name) + + parameter_type = parameter_properties.get('type') + sampling_scope = parameter_properties.get('sampling-scope') + if parameter_type == 'discrete': + new_parameters.update({node_op_parameter_name: + trial.suggest_int(node_op_parameter_name, *sampling_scope)}) + elif parameter_type == 'continuous': + new_parameters.update({node_op_parameter_name: + trial.suggest_float(node_op_parameter_name, *sampling_scope)}) + elif parameter_type == 'categorical': + new_parameters.update({node_op_parameter_name: + trial.suggest_categorical(node_op_parameter_name, *sampling_scope)}) + return new_parameters + + def _get_initial_point(self, graph: OptGraph) -> Tuple[dict, bool]: + initial_parameters = {} + has_parameters_to_optimize = False + for node_id, node in enumerate(graph.nodes): + operation_name = node.name + + # Get available parameters for operation + tunable_node_params = self.search_space.parameters_per_operation.get(operation_name) + + if tunable_node_params: + has_parameters_to_optimize = True + tunable_initial_params = {get_node_operation_parameter_label(node_id, operation_name, p): + node.parameters[p] for p in node.parameters if p in tunable_node_params} + if tunable_initial_params: + initial_parameters.update(tunable_initial_params) + return initial_parameters, has_parameters_to_optimize + + def early_stopping_callback(self, study: Study, trial: FrozenTrial): + if self.early_stopping_rounds is not None: + current_trial_number = trial.number + best_trial_number = study.best_trial.number + should_stop = (current_trial_number - best_trial_number) >= self.early_stopping_rounds + if should_stop: + self.log.debug('Early stopping rounds criteria was reached') + study.stop() + + +def tune_pipeline_industrial(self, train_data: InputData, pipeline_gp_composed: Pipeline) -> Pipeline: + """ Launch tuning procedure for obtained pipeline by composer """ + timeout_for_tuning = abs(self.timer.determine_resources_for_tuning()) / 60 + tuner = (TunerBuilder(self.params.task) + .with_tuner(DaskOptunaTuner) + .with_metric(self.metrics[0]) + .with_iterations(DEFAULT_TUNING_ITERATIONS_NUMBER) + .with_timeout(datetime.timedelta(minutes=timeout_for_tuning)) + .with_eval_time_constraint(self.params.composer_requirements.max_graph_fit_time) + .with_requirements(self.params.composer_requirements) + .build(train_data)) + + if self.timer.have_time_for_tuning(): + # Tune all nodes in the pipeline + with self.timer.launch_tuning(): + self.was_tuned = False + self.log.message(f'Hyperparameters tuning started with {round(timeout_for_tuning)} min. timeout') + tuned_pipeline = tuner.tune(pipeline_gp_composed) + self.log.message('Hyperparameters tuning finished') + else: + self.log.message(f'Time for pipeline composing was {str(self.timer.composing_spend_time)}.\n' + f'The remaining {max(0, round(timeout_for_tuning, 1))} seconds are not enough ' + f'to tune the hyperparameters.') + self.log.message('Composed pipeline returned without tuning.') + tuned_pipeline = pipeline_gp_composed + self.was_tuned = tuner.was_tuned + return tuned_pipeline diff --git a/fedot_ind/core/repository/initializer_industrial_models.py b/fedot_ind/core/repository/initializer_industrial_models.py index 172cb5d27..3582b436d 100644 --- a/fedot_ind/core/repository/initializer_industrial_models.py +++ b/fedot_ind/core/repository/initializer_industrial_models.py @@ -1,6 +1,5 @@ -import pathlib - import fedot.core.data.data_split as fedot_data_split +import golem.core.tuning.optuna_tuner as OptunaImpl from fedot.api.api_utils.api_composer import ApiComposer from fedot.api.api_utils.api_params_repository import ApiParamsRepository from fedot.core.data.merge.data_merger import ImageDataMerger, TSDataMerger @@ -10,20 +9,28 @@ LaggedImplementation, TsSmoothingImplementation from fedot.core.operations.operation import Operation from fedot.core.optimisers.objective.data_source_splitter import DataSourceSplitter +from fedot.core.pipelines.pipeline import Pipeline from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace from fedot.core.pipelines.verification import class_rules +from fedot.core.pipelines.verification import common_rules from fedot.core.repository.operation_types_repository import OperationTypesRepository -from fedot_ind.api.utils.path_lib import PROJECT_PATH +import fedot_ind.core.repository.model_repository as MODEL_REPO +from fedot_ind.core.repository.constanst_repository import IND_DATA_OPERATION_PATH, IND_MODEL_OPERATION_PATH, \ + DEFAULT_DATA_OPERATION_PATH, DEFAULT_MODEL_OPERATION_PATH from fedot_ind.core.repository.industrial_implementations.abstract import preprocess_industrial_predicts, \ transform_lagged_for_fit_industrial, transform_smoothing_industrial, transform_lagged_industrial, \ merge_industrial_predicts, merge_industrial_targets, build_industrial, postprocess_industrial_predicts, \ split_any_industrial, split_time_series_industrial, predict_operation_industrial, predict_industrial, \ predict_for_fit_industrial, update_column_types_industrial, _check_and_correct_window_size_industrial, \ fit_topo_extractor_industrial, transform_topo_extractor_industrial +from fedot_ind.core.repository.industrial_implementations.ml_optimisation import DaskOptunaTuner, \ + tune_pipeline_industrial from fedot_ind.core.repository.industrial_implementations.optimisation import _get_default_industrial_mutations from fedot_ind.core.repository.industrial_implementations.optimisation import \ has_no_data_flow_conflicts_in_industrial_pipeline +from fedot_ind.core.repository.model_repository import SKLEARN_REG_MODELS, SKLEARN_CLF_MODELS, FEDOT_PREPROC_MODEL +from fedot_ind.core.repository.model_repository import overload_model_implementation from fedot_ind.core.tuning.search_space import get_industrial_search_space FEDOT_METHOD_TO_REPLACE = [(PipelineSearchSpace, "get_parameters_dict"), @@ -45,7 +52,9 @@ (TopologicalFeaturesImplementation, 'transform'), (LaggedImplementation, 'transform_for_fit'), (LaggedImplementation, '_check_and_correct_window_size'), - (TsSmoothingImplementation, 'transform')] + (TsSmoothingImplementation, 'transform'), + (OptunaImpl, 'OptunaTuner'), + (ApiComposer, 'tune_final_pipeline')] INDUSTRIAL_REPLACE_METHODS = [get_industrial_search_space, _get_default_industrial_mutations, preprocess_industrial_predicts, @@ -65,44 +74,50 @@ transform_topo_extractor_industrial, transform_lagged_for_fit_industrial, _check_and_correct_window_size_industrial, - transform_smoothing_industrial] + transform_smoothing_industrial, + DaskOptunaTuner, + tune_pipeline_industrial] + DEFAULT_METHODS = [getattr(class_impl[0], class_impl[1]) for class_impl in FEDOT_METHOD_TO_REPLACE] +DEFAULT_MODELS_TO_REPLACE = [(MODEL_REPO, 'SKLEARN_REG_MODELS'), + (MODEL_REPO, 'SKLEARN_CLF_MODELS'), + (MODEL_REPO, 'FEDOT_PREPROC_MODEL')] + + +def has_no_resample(pipeline: Pipeline): + """ + Pipeline can have only one resample operation located in start of the pipeline + + :param pipeline: pipeline for checking + """ + for node in pipeline.nodes: + if node.name == 'resample': + raise ValueError( + f'Pipeline can not have resample operation') + return True class IndustrialModels: def __init__(self): - self.industrial_data_operation_path = pathlib.Path( - PROJECT_PATH, - 'fedot_ind', - 'core', - 'repository', - 'data', - 'industrial_data_operation_repository.json') - - self.base_data_operation_path = pathlib.Path( - 'data_operation_repository.json') - - self.industrial_model_path = pathlib.Path( - PROJECT_PATH, - 'fedot_ind', - 'core', - 'repository', - 'data', - 'industrial_model_repository.json') - - self.base_model_path = pathlib.Path('model_repository.json') - - def _replace_operation(self, to_industrial=True): - if to_industrial: - method = INDUSTRIAL_REPLACE_METHODS - else: - method = DEFAULT_METHODS + self.industrial_data_operation_path = IND_DATA_OPERATION_PATH + self.industrial_model_path = IND_MODEL_OPERATION_PATH + + self.base_data_operation_path = DEFAULT_DATA_OPERATION_PATH + self.base_model_path = DEFAULT_MODEL_OPERATION_PATH + + def _replace_operation(self, to_industrial=True, backend: str = 'default'): + method = INDUSTRIAL_REPLACE_METHODS if to_industrial else DEFAULT_METHODS for class_impl, method_to_replace in zip(FEDOT_METHOD_TO_REPLACE, method): setattr(class_impl[0], class_impl[1], method_to_replace) + if backend.__contains__('dask'): + model_to_overload = [SKLEARN_REG_MODELS, SKLEARN_CLF_MODELS, FEDOT_PREPROC_MODEL] + overloaded_model = overload_model_implementation(model_to_overload, backend=backend) + for model_impl, new_backend_impl in zip(DEFAULT_MODELS_TO_REPLACE, overloaded_model): + setattr(model_impl[0], model_impl[1], new_backend_impl) - def setup_repository(self): + def setup_repository(self, backend: str = 'default'): OperationTypesRepository.__repository_dict__.update( {'data_operation': {'file': self.industrial_data_operation_path, 'initialized_repo': True, @@ -118,12 +133,12 @@ def setup_repository(self): OperationTypesRepository.assign_repo( 'model', self.industrial_model_path) # replace mutations - self._replace_operation(to_industrial=True) + self._replace_operation(to_industrial=True, backend=backend) class_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline) return OperationTypesRepository - def setup_default_repository(self): + def setup_default_repository(self, backend: str = 'default'): """ Switching to fedot models. """ @@ -140,7 +155,8 @@ def setup_default_repository(self): 'initialized_repo': None, 'default_tags': []}}) OperationTypesRepository.assign_repo('model', self.base_model_path) - self._replace_operation(to_industrial=False) + self._replace_operation(to_industrial=False, backend=backend) + common_rules.append(has_no_resample) return OperationTypesRepository def __enter__(self): diff --git a/fedot_ind/core/repository/model_repository.py b/fedot_ind/core/repository/model_repository.py index 4621b0e2a..e26debd5b 100644 --- a/fedot_ind/core/repository/model_repository.py +++ b/fedot_ind/core/repository/model_repository.py @@ -1,6 +1,7 @@ from enum import Enum from itertools import chain +from dask_ml.decomposition import PCA as DaskKernelPCA from fedot.core.operations.evaluation.operation_implementations.data_operations.decompose import \ DecomposerClassImplementation from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_filters import \ @@ -8,8 +9,6 @@ from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_imbalanced_class import \ ResampleImplementation from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_transformations import * -from fedot.core.operations.evaluation.operation_implementations.data_operations.topological.fast_topological_extractor import \ - TopologicalFeaturesImplementation from fedot.core.operations.evaluation.operation_implementations.data_operations.ts_transformations import \ ExogDataTransformationImplementation, GaussianFilterImplementation, LaggedTransformationImplementation, \ SparseLaggedTransformationImplementation, TsSmoothingImplementation @@ -45,7 +44,6 @@ from fedot_ind.core.models.detection.custom.stat_detector import StatisticalDetector from fedot_ind.core.models.detection.probalistic.kalman import UnscentedKalmanFilter from fedot_ind.core.models.detection.subspaces.sst import SingularSpectrumTransformation -from fedot_ind.core.models.manifold.riemann_embeding import RiemannExtractor from fedot_ind.core.models.nn.network_impl.deep_tcn import TCNModel from fedot_ind.core.models.nn.network_impl.deepar import DeepAR from fedot_ind.core.models.nn.network_impl.dummy_nn import DummyOverComplicatedNeuralNetwork @@ -56,13 +54,18 @@ from fedot_ind.core.models.nn.network_impl.nbeats import NBeatsModel from fedot_ind.core.models.nn.network_impl.resnet import ResNetModel from fedot_ind.core.models.nn.network_impl.tst import TSTModel -from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor -from fedot_ind.core.models.recurrence.reccurence_extractor import RecurrenceExtractor +from fedot_ind.core.models.pdl.pairwise_model import PairwiseDifferenceClassifier, PairwiseDifferenceRegressor from fedot_ind.core.models.ts_forecasting.glm import GLMIndustrial from fedot_ind.core.operation.filtration.channel_filtration import ChannelCentroidFilter from fedot_ind.core.operation.transformation.basis.eigen_basis import EigenBasisImplementation from fedot_ind.core.operation.transformation.basis.fourier import FourierBasisImplementation from fedot_ind.core.operation.transformation.basis.wavelet import WaveletBasisImplementation +from fedot_ind.core.operation.transformation.representation.manifold.riemann_embeding import RiemannExtractor +from fedot_ind.core.operation.transformation.representation.recurrence.reccurence_extractor import RecurrenceExtractor +from fedot_ind.core.operation.transformation.representation.statistical.quantile_extractor import QuantileExtractor +from fedot_ind.core.operation.transformation.representation.topological.topological_extractor import \ + TopologicalExtractor +from fedot_ind.core.repository.dask_models import DaskLogisticRegression, DaskRidgeRegression from fedot_ind.core.repository.excluded import EXCLUDED_OPERATION_MUTATION, TEMPORARY_EXCLUDED @@ -89,7 +92,9 @@ class AtomizedModel(Enum): # external models 'lgbm': LGBMClassifier, # for detection - 'one_class_svm': OneClassSVM + 'one_class_svm': OneClassSVM, + # pairwise model + 'pdl_clf': PairwiseDifferenceClassifier } FEDOT_PREPROC_MODEL = { # data standartization @@ -99,8 +104,9 @@ class AtomizedModel(Enum): 'simple_imputation': ImputationImplementation, # dimension reduction 'kernel_pca': KernelPCAImplementation, + 'pca': PCAImplementation, # feature generation - 'topological_extractor': TopologicalFeaturesImplementation + # 'topological_extractor': TopologicalFeaturesImplementation } INDUSTRIAL_PREPROC_MODEL = { # data filtration @@ -114,7 +120,8 @@ class AtomizedModel(Enum): 'quantile_extractor': QuantileExtractor, 'riemann_extractor': RiemannExtractor, # feature generation - 'topological_extractor': TopologicalFeaturesImplementation, + # 'topological_extractor': TopologicalFeaturesImplementation, + 'topological_extractor': TopologicalExtractor, # nn feature extraction algorithm 'minirocket_extractor': MiniRocketExtractor, # 'chronos_extractor': ChronosExtractor, @@ -136,7 +143,9 @@ class AtomizedModel(Enum): 'dtreg': DecisionTreeRegressor, # external models 'lgbmreg': LGBMRegressor, - "catboostreg": FedotCatBoostRegressionImplementation + "catboostreg": FedotCatBoostRegressionImplementation, + # pairwise model + 'pdl_reg': PairwiseDifferenceRegressor } FORECASTING_MODELS = { @@ -194,6 +203,14 @@ class AtomizedModel(Enum): 'lora_model': LoraModel } + # DASK_MODELS = {'logit': DaskLogReg, + DASK_MODELS = {'logit': DaskLogisticRegression, + 'kernel_pca': DaskKernelPCA, + # 'kernel_pca': DaskKernelPCA, + # 'ridge': DaskLinReg + 'ridge': DaskRidgeRegression + } + def default_industrial_availiable_operation(problem: str = 'regression'): operation_dict = {'regression': SKLEARN_REG_MODELS.keys(), @@ -232,12 +249,28 @@ def default_industrial_availiable_operation(problem: str = 'regression'): return operations +def overload_model_implementation(list_of_model, backend: str = 'default'): + overload_list = [] + for model_dict in list_of_model: + for model_impl in model_dict.keys(): + if model_impl in DASK_MODELS.keys() and backend.__contains__('dask'): + model_dict[model_impl] = DASK_MODELS[model_impl] + overload_list.append(model_dict) + return overload_list + + +MODELS_WITH_DASK_ALTERNATIVE = [ + AtomizedModel.FEDOT_PREPROC_MODEL.value, + AtomizedModel.SKLEARN_CLF_MODELS.value, + AtomizedModel.SKLEARN_REG_MODELS.value +] +DASK_MODELS = AtomizedModel.DASK_MODELS.value +SKLEARN_REG_MODELS = AtomizedModel.SKLEARN_REG_MODELS.value +SKLEARN_CLF_MODELS = AtomizedModel.SKLEARN_CLF_MODELS.value +FEDOT_PREPROC_MODEL = AtomizedModel.FEDOT_PREPROC_MODEL.value INDUSTRIAL_PREPROC_MODEL = AtomizedModel.INDUSTRIAL_PREPROC_MODEL.value INDUSTRIAL_CLF_PREPROC_MODEL = AtomizedModel.INDUSTRIAL_CLF_PREPROC_MODEL.value -FEDOT_PREPROC_MODEL = AtomizedModel.FEDOT_PREPROC_MODEL.value -SKLEARN_CLF_MODELS = AtomizedModel.SKLEARN_CLF_MODELS.value ANOMALY_DETECTION_MODELS = AtomizedModel.ANOMALY_DETECTION_MODELS.value -SKLEARN_REG_MODELS = AtomizedModel.SKLEARN_REG_MODELS.value NEURAL_MODEL = AtomizedModel.NEURAL_MODEL.value FORECASTING_MODELS = AtomizedModel.FORECASTING_MODELS.value FORECASTING_PREPROC = AtomizedModel.FORECASTING_PREPROC.value diff --git a/fedot_ind/core/tuning/search_space.py b/fedot_ind/core/tuning/search_space.py index 9d1f6c858..dc8b7755d 100644 --- a/fedot_ind/core/tuning/search_space.py +++ b/fedot_ind/core/tuning/search_space.py @@ -17,7 +17,8 @@ 'wavelet_basis': {'n_components': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 10]}, 'wavelet': {'hyperopt-dist': hp.choice, - 'sampling-scope': [['mexh', 'morl', 'db5', 'sym5']]}}, + 'sampling-scope': [['mexh', 'morl', 'gaus1', 'gaus8', 'gaus5']]}, + 'low_freq': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]}}, 'fourier_basis': {'threshold': {'hyperopt-dist': hp.choice, 'sampling-scope': [list(np.arange(0.75, 0.99, 0.05))]}, 'low_rank': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 30, 3)]]}, @@ -123,643 +124,650 @@ {'anomaly_thr': {'hyperopt-dist': hp.choice, 'sampling-scope': [list(np.arange(0.75, 0.99, 0.05))]}, 'window_length': {'hyperopt-dist': hp.choice, 'sampling-scope': [list(np.arange(10, 35, 5))]}}, + 'pdl_clf': {}, + 'pdl_reg': {} } +default_fedot_operation_params = { + 'kmeans': { + 'n_clusters': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 7], + 'type': 'discrete'}}, + 'adareg': { + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-3, 1], + 'type': 'continuous'}, + 'loss': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["linear", "square", "exponential"]], + 'type': 'categorical'}}, + 'gbr': { + 'loss': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["ls", "lad", "huber", "quantile"]], + 'type': 'categorical'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-3, 1], + 'type': 'continuous'}, + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 21], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 21], + 'type': 'discrete'}, + 'subsample': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'max_features': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'alpha': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.75, 0.99], + 'type': 'continuous'}}, + 'logit': { + 'C': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1e-2, 10.0], + 'type': 'continuous'}, -def get_industrial_search_space(self): - parameters_per_operation = { - 'kmeans': { - 'n_clusters': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 7], - 'type': 'discrete'}}, - 'adareg': { - 'learning_rate': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [1e-3, 1], - 'type': 'continuous'}, - 'loss': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [["linear", "square", "exponential"]], - 'type': 'categorical'}}, - 'gbr': { - 'loss': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [["ls", "lad", "huber", "quantile"]], - 'type': 'categorical'}, - 'learning_rate': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [1e-3, 1], - 'type': 'continuous'}, - 'max_depth': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 11], - 'type': 'discrete'}, - 'min_samples_split': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 21], - 'type': 'discrete'}, - 'min_samples_leaf': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 21], - 'type': 'discrete'}, - 'subsample': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.05, 1.0], - 'type': 'continuous'}, - 'max_features': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.05, 1.0], - 'type': 'continuous'}, - 'alpha': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.75, 0.99], - 'type': 'continuous'}}, - 'logit': { - 'C': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [1e-2, 10.0], - 'type': 'continuous'}, + 'penalty': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['l1', 'l2']], + 'type': 'categorical'}, - 'penalty': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['l1', 'l2']], - 'type': 'categorical'}, + 'solver': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['liblinear']], + 'type': 'categorical'}}, + 'rf': { + 'criterion': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["gini", "entropy"]], + 'type': 'categorical'}, + 'max_features': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 10], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 15], + 'type': 'discrete'}, + 'bootstrap': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'}}, + 'ridge': { + 'alpha': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.01, 10.0], + 'type': 'continuous'}}, + 'lasso': { + 'alpha': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.01, 10.0], + 'type': 'continuous'}}, + 'rfr': { + 'max_features': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 21], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 15], + 'type': 'discrete'}, + 'bootstrap': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'}}, + 'xgbreg': { + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-3, 1], + 'type': 'continuous'}, + 'subsample': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'min_child_weight': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 21], + 'type': 'discrete'}}, + 'xgboost': { + 'n_estimators': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [100, 3000], + 'type': 'discrete'}, + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [3, 10], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-3, 1], + 'type': 'continuous'}, + 'subsample': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 0.99], + 'type': 'continuous'}, + 'min_weight_fraction_leaf': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.0, 0.5], + 'type': 'continuous'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.0, 1], + 'type': 'continuous'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.0, 1.0], + 'type': 'continuous'}}, + 'svr': { + 'C': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1e-4, 25.0], + 'type': 'continuous'}, + 'epsilon': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1e-4, 1], + 'type': 'continuous'}, + 'tol': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-5, 1e-1], + 'type': 'continuous'}, + 'loss': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["epsilon_insensitive", "squared_epsilon_insensitive"]], + 'type': 'categorical'}}, + 'dtreg': { + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 21], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 21], + 'type': 'discrete'}}, + 'treg': { + 'max_features': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.05, 1.0], + 'type': 'continuous'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 21], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 21], + 'type': 'discrete'}, + 'bootstrap': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'}}, + 'dt': { + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'min_samples_split': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 21], + 'type': 'discrete'}, + 'min_samples_leaf': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 21], + 'type': 'discrete'}}, + 'knnreg': { + 'n_neighbors': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 50], + 'type': 'discrete'}, + 'weights': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["uniform", "distance"]], + 'type': 'categorical'}, + 'p': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[1, 2]], + 'type': 'categorical'}}, + 'knn': { + 'n_neighbors': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 50], + 'type': 'discrete'}, + 'weights': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["uniform", "distance"]], + 'type': 'categorical'}, + 'p': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[1, 2]], + 'type': 'categorical'}}, + 'arima': { + 'p': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 7], + 'type': 'discrete'}, + 'd': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [0, 2], + 'type': 'discrete'}, + 'q': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 5], + 'type': 'discrete'}}, + 'stl_arima': { + 'p': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 7], + 'type': 'discrete'}, + 'd': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [0, 2], + 'type': 'discrete'}, + 'q': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 5], + 'type': 'discrete'}, + 'period': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 365], + 'type': 'discrete'}}, + 'mlp': { + 'hidden_layer_sizes': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[(256, 128, 64, 32), (1028, 512, 64,)]], + 'type': 'categorical'}, + 'activation': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['logistic', 'tanh', 'relu']], + 'type': 'categorical'}, + 'max_iter': {'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1000, 2000], + 'type': 'discrete'}, + 'learning_rate': {'hyperopt-dist': hp.choice, + 'sampling-scope': [['constant', 'adaptive']], + 'type': 'categorical'}}, + 'ar': { + 'lag_1': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [2, 200], + 'type': 'continuous'}, + 'lag_2': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [2, 800], + 'type': 'continuous'}, + 'trend': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['n', 'c', 't', 'ct']], + 'type': 'categorical'}, + 'period': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[5, 7, 14, 21, 30, 365]], + 'type': 'categorical'}, + 'seasonal': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'}, + 'deterministic': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'} + }, + 'ets': { + 'error': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [["add", "mul"]], + 'type': 'categorical'}, + 'trend': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[None, "add", "mul"]], + 'type': 'categorical'}, + 'seasonal': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[None, "add", "mul"]], + 'type': 'categorical'}, + 'damped_trend': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'}, + 'seasonal_periods': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1, 100], + 'type': 'continuous'}}, + 'glm': { + NESTED_PARAMS_LABEL: { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[ + { + 'family': 'gaussian', + 'link': hp.choice('link_gaussian', ['identity', + 'inverse_power', + 'log']) + }, + { + 'family': 'gamma', + 'link': hp.choice('link_gamma', ['identity', + 'inverse_power', + 'log']) + }, + { + 'family': 'inverse_gaussian', + 'link': hp.choice('link_inv_gaussian', ['identity', + 'inverse_power']) + } - 'solver': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['liblinear']], - 'type': 'categorical'}}, - 'rf': { - 'criterion': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [["gini", "entropy"]], - 'type': 'categorical'}, - 'max_features': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.05, 1.0], - 'type': 'continuous'}, - 'min_samples_split': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 10], - 'type': 'discrete'}, - 'min_samples_leaf': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 15], - 'type': 'discrete'}, - 'bootstrap': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[True, False]], - 'type': 'categorical'}}, - 'ridge': { - 'alpha': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.01, 10.0], - 'type': 'continuous'}}, - 'lasso': { - 'alpha': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.01, 10.0], - 'type': 'continuous'}}, - 'rfr': { - 'max_features': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.05, 1.0], - 'type': 'continuous'}, - 'min_samples_split': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 21], - 'type': 'discrete'}, - 'min_samples_leaf': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 15], - 'type': 'discrete'}, - 'bootstrap': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[True, False]], - 'type': 'categorical'}}, - 'xgbreg': { - 'max_depth': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 11], - 'type': 'discrete'}, - 'learning_rate': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [1e-3, 1], - 'type': 'continuous'}, - 'subsample': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.05, 1.0], - 'type': 'continuous'}, - 'min_child_weight': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 21], - 'type': 'discrete'}}, - 'xgboost': { - 'n_estimators': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [100, 3000], - 'type': 'discrete'}, - 'max_depth': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [3, 10], - 'type': 'discrete'}, - 'learning_rate': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [1e-3, 1], - 'type': 'continuous'}, - 'subsample': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.05, 0.99], - 'type': 'continuous'}, - 'min_weight_fraction_leaf': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.0, 0.5], - 'type': 'continuous'}, - 'min_samples_leaf': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.0, 1], - 'type': 'continuous'}, - 'min_samples_split': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.0, 1.0], - 'type': 'continuous'}}, - 'svr': { - 'C': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [1e-4, 25.0], - 'type': 'continuous'}, - 'epsilon': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [1e-4, 1], - 'type': 'continuous'}, - 'tol': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [1e-5, 1e-1], - 'type': 'continuous'}, - 'loss': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [["epsilon_insensitive", "squared_epsilon_insensitive"]], - 'type': 'categorical'}}, - 'dtreg': { - 'max_depth': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 11], - 'type': 'discrete'}, - 'min_samples_split': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 21], - 'type': 'discrete'}, - 'min_samples_leaf': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 21], - 'type': 'discrete'}}, - 'treg': { - 'max_features': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.05, 1.0], - 'type': 'continuous'}, - 'min_samples_split': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 21], - 'type': 'discrete'}, - 'min_samples_leaf': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 21], - 'type': 'discrete'}, - 'bootstrap': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[True, False]], - 'type': 'categorical'}}, - 'dt': { - 'max_depth': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 11], - 'type': 'discrete'}, - 'min_samples_split': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 21], - 'type': 'discrete'}, - 'min_samples_leaf': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 21], - 'type': 'discrete'}}, - 'knnreg': { - 'n_neighbors': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 50], - 'type': 'discrete'}, - 'weights': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [["uniform", "distance"]], - 'type': 'categorical'}, - 'p': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[1, 2]], - 'type': 'categorical'}}, - 'knn': { - 'n_neighbors': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 50], - 'type': 'discrete'}, - 'weights': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [["uniform", "distance"]], - 'type': 'categorical'}, - 'p': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[1, 2]], - 'type': 'categorical'}}, - 'arima': { - 'p': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 7], - 'type': 'discrete'}, - 'd': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [0, 2], - 'type': 'discrete'}, - 'q': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 5], - 'type': 'discrete'}}, - 'stl_arima': { - 'p': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 7], - 'type': 'discrete'}, - 'd': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [0, 2], - 'type': 'discrete'}, - 'q': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 5], - 'type': 'discrete'}, - 'period': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 365], - 'type': 'discrete'}}, - 'mlp': { - 'hidden_layer_sizes': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[(256, 128, 64, 32), (1028, 512, 64,)]], - 'type': 'categorical'}, - 'activation': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['logistic', 'tanh', 'relu']], - 'type': 'categorical'}, - 'max_iter': {'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1000, 2000], - 'type': 'discrete'}, - 'learning_rate': {'hyperopt-dist': hp.choice, - 'sampling-scope': [['constant', 'adaptive']], - 'type': 'categorical'}}, - 'ar': { - 'lag_1': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [2, 200], - 'type': 'continuous'}, - 'lag_2': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [2, 800], - 'type': 'continuous'}, - 'trend': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['n', 'c', 't', 'ct']], - 'type': 'categorical'}, - 'period': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[5, 7, 14, 21, 30, 365]], - 'type': 'categorical'}, - 'seasonal': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[True, False]], - 'type': 'categorical'}, - 'deterministic': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[True, False]], - 'type': 'categorical'} + ]], + 'type': 'categorical'}}, + 'cgru': { + 'hidden_size': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [20, 200], + 'type': 'continuous'}, + 'learning_rate': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.0005, 0.005], + 'type': 'continuous'}, + 'cnn1_kernel_size': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [3, 8], + 'type': 'discrete'}, + 'cnn1_output_size': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[8, 16, 32, 64]], + 'type': 'categorical'}, + 'cnn2_kernel_size': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [3, 8], + 'type': 'discrete'}, + 'cnn2_output_size': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[8, 16, 32, 64]], + 'type': 'categorical'}, + 'batch_size': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[64, 128]], + 'type': 'categorical'}, + 'num_epochs': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[10, 20, 50, 100]], + 'type': 'categorical'}, + 'optimizer': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['adamw', 'sgd']], + 'type': 'categorical'}, + 'loss': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['mae', 'mse']], + 'type': 'categorical'}}, + 'topological_extractor': { + 'window_size_as_share': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.9], + 'type': 'continuous' + }, + 'max_homology_dimension': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 3], + 'type': 'discrete' }, - 'ets': { - 'error': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [["add", "mul"]], - 'type': 'categorical'}, - 'trend': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[None, "add", "mul"]], - 'type': 'categorical'}, - 'seasonal': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[None, "add", "mul"]], - 'type': 'categorical'}, - 'damped_trend': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[True, False]], - 'type': 'categorical'}, - 'seasonal_periods': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [1, 100], - 'type': 'continuous'}}, - 'glm': { - NESTED_PARAMS_LABEL: { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[ - { - 'family': 'gaussian', - 'link': hp.choice('link_gaussian', ['identity', - 'inverse_power', - 'log']) - }, - { - 'family': 'gamma', - 'link': hp.choice('link_gamma', ['identity', - 'inverse_power', - 'log']) - }, - { - 'family': 'inverse_gaussian', - 'link': hp.choice('link_inv_gaussian', ['identity', - 'inverse_power']) - } + 'metric': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['euclidean', 'manhattan', 'cosine']], + 'type': 'categorical'}}, + 'pca': { + 'n_components': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.99], + 'type': 'continuous'}}, + 'kernel_pca': { + 'n_components': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 20], + 'type': 'discrete'}, + 'kernel': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']], + 'type': 'categorical'}}, + 'lagged': { + 'window_size': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [5, 500], + 'type': 'discrete'}}, + 'sparse_lagged': { + 'window_size': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [5, 500], + 'type': 'discrete'}, + 'n_components': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0, 0.5], + 'type': 'continuous'}, + 'use_svd': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'}}, + 'smoothing': { + 'window_size': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 20], + 'type': 'discrete'}}, + 'gaussian_filter': { + 'sigma': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1, 5], + 'type': 'continuous'}}, + 'diff_filter': { + 'poly_degree': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 5], + 'type': 'discrete'}, + 'order': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [1, 3], + 'type': 'continuous'}, + 'window_size': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [3, 20], + 'type': 'continuous'}}, + 'cut': { + 'cut_part': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0, 0.9], + 'type': 'continuous'}}, + 'lgbm': { + 'class_weight': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[None, 'balanced']], + 'type': 'categorical'}, + 'num_leaves': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 256], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [0.01, 0.2], + 'type': 'continuous'}, + 'colsample_bytree': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.4, 1], + 'type': 'continuous'}, + 'subsample': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.4, 1], + 'type': 'continuous'}, + 'reg_alpha': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'}, + 'reg_lambda': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'}}, + 'lgbmreg': { + 'num_leaves': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [128, 1024], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [0.001, 0.1], + 'type': 'continuous'}, + 'colsample_bytree': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 1], + 'type': 'continuous'}, + 'subsample': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 1], + 'type': 'continuous'}, + 'reg_alpha': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'}, + 'reg_lambda': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'}}, + 'catboost': { + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [0.01, 0.2], + 'type': 'continuous'}, + 'min_data_in_leaf': { + 'hyperopt-dist': partial(hp.qloguniform, q=1), + 'sampling-scope': [0, 6], + 'type': 'discrete'}, + 'border_count': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 255], + 'type': 'discrete'}, + 'l2_leaf_reg': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'}}, + 'catboostreg': { + 'max_depth': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [1, 11], + 'type': 'discrete'}, + 'learning_rate': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [0.01, 0.2], + 'type': 'continuous'}, + 'min_data_in_leaf': { + 'hyperopt-dist': partial(hp.qloguniform, q=1), + 'sampling-scope': [0, 6], + 'type': 'discrete'}, + 'border_count': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 255], + 'type': 'discrete'}, + 'l2_leaf_reg': { + 'hyperopt-dist': hp.loguniform, + 'sampling-scope': [1e-8, 10], + 'type': 'continuous'}}, + 'resample': { + 'balance': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['expand_minority', 'reduce_majority']], + 'type': 'categorical'}, + 'replace': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[True, False]], + 'type': 'categorical'}, + 'balance_ratio': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.3, 1], + 'type': 'continuous'}}, + 'lda': { + 'solver': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['svd', 'lsqr', 'eigen']], + 'type': 'categorical'}, + 'shrinkage': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 0.9], + 'type': 'continuous'}}, + 'ts_naive_average': { + 'part_for_averaging': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.1, 1], + 'type': 'continuous'}}, + 'locf': { + 'part_for_repeat': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.01, 0.5], + 'type': 'continuous'}}, + 'word2vec_pretrained': { + 'model_name': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [['glove-twitter-25', 'glove-twitter-50', + 'glove-wiki-gigaword-100', 'word2vec-ruscorpora-300']], + 'type': 'categorical'}}, + 'tfidf': { + 'ngram_range': { + 'hyperopt-dist': hp.choice, + 'sampling-scope': [[(1, 1), (1, 2), (1, 3)]], + 'type': 'categorical'}, + 'min_df': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.0001, 0.1], + 'type': 'continuous'}, + 'max_df': { + 'hyperopt-dist': hp.uniform, + 'sampling-scope': [0.9, 0.99], + 'type': 'continuous'}}, +} - ]], - 'type': 'categorical'}}, - 'cgru': { - 'hidden_size': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [20, 200], - 'type': 'continuous'}, - 'learning_rate': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.0005, 0.005], - 'type': 'continuous'}, - 'cnn1_kernel_size': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [3, 8], - 'type': 'discrete'}, - 'cnn1_output_size': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[8, 16, 32, 64]], - 'type': 'categorical'}, - 'cnn2_kernel_size': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [3, 8], - 'type': 'discrete'}, - 'cnn2_output_size': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[8, 16, 32, 64]], - 'type': 'categorical'}, - 'batch_size': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[64, 128]], - 'type': 'categorical'}, - 'num_epochs': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[10, 20, 50, 100]], - 'type': 'categorical'}, - 'optimizer': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['adamw', 'sgd']], - 'type': 'categorical'}, - 'loss': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['mae', 'mse']], - 'type': 'categorical'}}, - 'topological_extractor': { - 'window_size_as_share': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.1, 0.9], - 'type': 'continuous' - }, - 'max_homology_dimension': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 3], - 'type': 'discrete' - }, - 'metric': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['euclidean', 'manhattan', 'cosine']], - 'type': 'categorical'}}, - 'pca': { - 'n_components': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.1, 0.99], - 'type': 'continuous'}}, - 'kernel_pca': { - 'n_components': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 20], - 'type': 'discrete'}, - 'kernel': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']], - 'type': 'categorical'}}, - 'lagged': { - 'window_size': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [5, 500], - 'type': 'discrete'}}, - 'sparse_lagged': { - 'window_size': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [5, 500], - 'type': 'discrete'}, - 'n_components': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0, 0.5], - 'type': 'continuous'}, - 'use_svd': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[True, False]], - 'type': 'categorical'}}, - 'smoothing': { - 'window_size': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 20], - 'type': 'discrete'}}, - 'gaussian_filter': { - 'sigma': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [1, 5], - 'type': 'continuous'}}, - 'diff_filter': { - 'poly_degree': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 5], - 'type': 'discrete'}, - 'order': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [1, 3], - 'type': 'continuous'}, - 'window_size': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [3, 20], - 'type': 'continuous'}}, - 'cut': { - 'cut_part': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0, 0.9], - 'type': 'continuous'}}, - 'lgbm': { - 'class_weight': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[None, 'balanced']], - 'type': 'categorical'}, - 'num_leaves': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 256], - 'type': 'discrete'}, - 'learning_rate': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [0.01, 0.2], - 'type': 'continuous'}, - 'colsample_bytree': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.4, 1], - 'type': 'continuous'}, - 'subsample': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.4, 1], - 'type': 'continuous'}, - 'reg_alpha': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [1e-8, 10], - 'type': 'continuous'}, - 'reg_lambda': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [1e-8, 10], - 'type': 'continuous'}}, - 'lgbmreg': { - 'num_leaves': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [128, 1024], - 'type': 'discrete'}, - 'learning_rate': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [0.001, 0.1], - 'type': 'continuous'}, - 'colsample_bytree': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.1, 1], - 'type': 'continuous'}, - 'subsample': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.1, 1], - 'type': 'continuous'}, - 'reg_alpha': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [1e-8, 10], - 'type': 'continuous'}, - 'reg_lambda': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [1e-8, 10], - 'type': 'continuous'}}, - 'catboost': { - 'max_depth': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 11], - 'type': 'discrete'}, - 'learning_rate': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [0.01, 0.2], - 'type': 'continuous'}, - 'min_data_in_leaf': { - 'hyperopt-dist': partial(hp.qloguniform, q=1), - 'sampling-scope': [0, 6], - 'type': 'discrete'}, - 'border_count': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 255], - 'type': 'discrete'}, - 'l2_leaf_reg': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [1e-8, 10], - 'type': 'continuous'}}, - 'catboostreg': { - 'max_depth': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [1, 11], - 'type': 'discrete'}, - 'learning_rate': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [0.01, 0.2], - 'type': 'continuous'}, - 'min_data_in_leaf': { - 'hyperopt-dist': partial(hp.qloguniform, q=1), - 'sampling-scope': [0, 6], - 'type': 'discrete'}, - 'border_count': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 255], - 'type': 'discrete'}, - 'l2_leaf_reg': { - 'hyperopt-dist': hp.loguniform, - 'sampling-scope': [1e-8, 10], - 'type': 'continuous'}}, - 'resample': { - 'balance': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['expand_minority', 'reduce_majority']], - 'type': 'categorical'}, - 'replace': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[True, False]], - 'type': 'categorical'}, - 'balance_ratio': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.3, 1], - 'type': 'continuous'}}, - 'lda': { - 'solver': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['svd', 'lsqr', 'eigen']], - 'type': 'categorical'}, - 'shrinkage': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.1, 0.9], - 'type': 'continuous'}}, - 'ts_naive_average': { - 'part_for_averaging': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.1, 1], - 'type': 'continuous'}}, - 'locf': { - 'part_for_repeat': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.01, 0.5], - 'type': 'continuous'}}, - 'word2vec_pretrained': { - 'model_name': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [['glove-twitter-25', 'glove-twitter-50', - 'glove-wiki-gigaword-100', 'word2vec-ruscorpora-300']], - 'type': 'categorical'}}, - 'tfidf': { - 'ngram_range': { - 'hyperopt-dist': hp.choice, - 'sampling-scope': [[(1, 1), (1, 2), (1, 3)]], - 'type': 'categorical'}, - 'min_df': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.0001, 0.1], - 'type': 'continuous'}, - 'max_df': { - 'hyperopt-dist': hp.uniform, - 'sampling-scope': [0.9, 0.99], - 'type': 'continuous'}}, - } - for key in industrial_search_space: - parameters_per_operation[key] = industrial_search_space[key] +pdl_base_model = {'pdl_clf': 'rf', + 'pdl_reg': 'treg'} + +def get_industrial_search_space(self): + for key in industrial_search_space: + default_fedot_operation_params[key] = industrial_search_space[key] + if key.__contains__('pdl'): + default_fedot_operation_params[key] = default_fedot_operation_params[pdl_base_model[key]] if 'custom_search_space' in dir(self): if self.custom_search_space is not None: for operation in self.custom_search_space.keys(): if self.replace_default_search_space: - parameters_per_operation[operation] = self.custom_search_space[operation] + default_fedot_operation_params[operation] = self.custom_search_space[operation] else: for key, value in self.custom_search_space[operation].items(): - parameters_per_operation[operation][key] = value + default_fedot_operation_params[operation][key] = value - return parameters_per_operation + return default_fedot_operation_params diff --git a/fedot_ind/tools/explain/explain.py b/fedot_ind/tools/explain/explain.py index 83b18e0fd..16e7e39e7 100644 --- a/fedot_ind/tools/explain/explain.py +++ b/fedot_ind/tools/explain/explain.py @@ -44,8 +44,7 @@ def _get_recurrence_matrix(self): return recurrence_extractor def explain(self, **kwargs): - recurrence_extractor = self._get_recurrence_matrix() - rec_matrix = recurrence_extractor.predict + rec_matrix = self._get_recurrence_matrix().predict if len(self.features) <= 3 else self.features for classes in np.unique(self.target): cls_idx = np.where(self.target == classes)[0] self.rec_matrix_by_cls.update({classes: rec_matrix[cls_idx, :, :, :]}) diff --git a/fedot_ind/tools/explain/pcd.py b/fedot_ind/tools/explain/pcd.py new file mode 100644 index 000000000..b9af402ef --- /dev/null +++ b/fedot_ind/tools/explain/pcd.py @@ -0,0 +1,203 @@ +import struct +from itertools import product + +import numpy as np + +try: + from .cwrapped import tessellate + + c_lib = True +except ImportError: + c_lib = False + +ASCII_FACET = """ facet normal {face[0]:e} {face[1]:e} {face[2]:e} + outer loop + vertex {face[3]:e} {face[4]:e} {face[5]:e} + vertex {face[6]:e} {face[7]:e} {face[8]:e} + vertex {face[9]:e} {face[10]:e} {face[11]:e} + endloop + endfacet""" + +BINARY_HEADER = "80sI" +BINARY_FACET = "12fH" + + +def _build_binary_stl(facets): + """returns a string of binary binary data for the stl file""" + + lines = [struct.pack(BINARY_HEADER, b'Binary STL Writer', len(facets)), ] + for facet in facets: + facet = list(facet) + facet.append(0) # need to pad the end with a unsigned short byte + lines.append(struct.pack(BINARY_FACET, *facet)) + return lines + + +def _build_ascii_stl(facets): + """returns a list of ascii lines for the stl file """ + + lines = ['solid ffd_geom', ] + for facet in facets: + lines.append(ASCII_FACET.format(face=facet)) + lines.append('endsolid ffd_geom') + return lines + + +def writeSTL(facets, file_name, ascii=False): + """writes an ASCII or binary STL file""" + + f = open(file_name, 'wb') + if ascii: + lines = _build_ascii_stl(facets) + lines_ = "\n".join(lines).encode("UTF-8") + f.write(lines_) + else: + data = _build_binary_stl(facets) + data = b"".join(data) + f.write(data) + + f.close() + + +def roll2d(image, shifts): + return np.roll(np.roll(image, shifts[0], axis=0), shifts[1], axis=1) + + +def numpy2stl(A, fn, scale=0.1, mask_val=None, ascii=False, + max_width=235., + max_depth=140., + max_height=150., + solid=False, + rotate=True, + min_thickness_percent=0.1, + force_python=False): + """ + Reads a numpy array, and outputs an STL file + + Inputs: + A (ndarray) - an 'm' by 'n' 2D numpy array + fn (string) - filename to use for STL file + + Optional input: + scale (float) - scales the height (surface) of the + resulting STL mesh. Tune to match needs + + mask_val (float) - any element of the inputted array that is less + than this value will not be included in the mesh. + default renders all vertices (x > -inf for all float x) + + ascii (bool) - sets the STL format to ascii or binary (default) + + max_width, max_depth, max_height (floats) - maximum size of the stl + object (in mm). Match this to + the dimensions of a 3D printer + platform + solid (bool): sets whether to create a solid geometry (with sides and + a bottom) or not. + min_thickness_percent (float) : when creating the solid bottom face, this + multiplier sets the minimum thickness in + the final geometry (shallowest interior + point to bottom face), as a percentage of + the thickness of the model computed up to + that point. + Returns: (None) + """ + + m, n = A.shape + if n >= m and rotate: + # rotate to best fit a printing platform + A = np.rot90(A, k=3) + m, n = n, m + A = scale * (A - A.min()) + + if not mask_val: + mask_val = A.min() - 1. + + if c_lib and not force_python: # try to use c library + # needed for memoryviews + A = np.ascontiguousarray(A, dtype=float) + + facets = np.asarray(tessellate(A, mask_val, min_thickness_percent, + solid)) + # center on platform + facets[:, 3::3] += -m / 2 + facets[:, 4::3] += -n / 2 + + else: # use python + numpy + facets = [] + mask = np.zeros((m, n)) + print("Creating top mesh...") + for i, k in product(range(m - 1), range(n - 1)): + + this_pt = np.array([i - m / 2., k - n / 2., A[i, k]]) + top_right = np.array([i - m / 2., k + 1 - n / 2., A[i, k + 1]]) + bottom_left = np.array([i + 1. - m / 2., k - n / 2., A[i + 1, k]]) + bottom_right = np.array( + [i + 1. - m / 2., k + 1 - n / 2., A[i + 1, k + 1]]) + + n1, n2 = np.zeros(3), np.zeros(3) + + if (this_pt[-1] > mask_val and top_right[-1] > mask_val and + bottom_left[-1] > mask_val): + facet = np.concatenate([n1, top_right, this_pt, bottom_right]) + mask[i, k] = 1 + mask[i, k + 1] = 1 + mask[i + 1, k] = 1 + facets.append(facet) + + if (this_pt[-1] > mask_val and bottom_right[-1] > mask_val and + bottom_left[-1] > mask_val): + facet = np.concatenate( + [n2, bottom_right, this_pt, bottom_left]) + facets.append(facet) + mask[i, k] = 1 + mask[i + 1, k + 1] = 1 + mask[i + 1, k] = 1 + facets = np.array(facets) + + if solid: + print("Computed edges...") + edge_mask = np.sum([roll2d(mask, (i, k)) + for i, k in product([-1, 0, 1], repeat=2)], + axis=0) + edge_mask[np.where(edge_mask == 9.)] = 0. + edge_mask[np.where(edge_mask != 0.)] = 1. + edge_mask[0::m - 1, :] = 1. + edge_mask[:, 0::n - 1] = 1. + X, Y = np.where(edge_mask == 1.) + locs = zip(X - m / 2., Y - n / 2.) + + zvals = facets[:, 5::3] + zmin, zthickness = zvals.min(), zvals.ptp() + + minval = zmin - min_thickness_percent * zthickness + + bottom = [] + print("Extending edges, creating bottom...") + for i, facet in enumerate(facets): + if (facet[3], facet[4]) in locs: + facets[i][5] = minval + if (facet[6], facet[7]) in locs: + facets[i][8] = minval + if (facet[9], facet[10]) in locs: + facets[i][11] = minval + this_bottom = np.concatenate( + [facet[:3], facet[6:8], [minval], facet[3:5], [minval], + facet[9:11], [minval]]) + bottom.append(this_bottom) + + facets = np.concatenate([facets, bottom]) + + xsize = facets[:, 3::3].ptp() + if xsize > max_width: + facets = facets * float(max_width) / xsize + + ysize = facets[:, 4::3].ptp() + if ysize > max_depth: + facets = facets * float(max_depth) / ysize + + zsize = facets[:, 5::3].ptp() + if zsize > max_height: + facets = facets * float(max_height) / zsize + + writeSTL(facets, fn, ascii=ascii) diff --git a/fedot_ind/tools/loader.py b/fedot_ind/tools/loader.py index 88e9a87c0..f43272a5f 100644 --- a/fedot_ind/tools/loader.py +++ b/fedot_ind/tools/loader.py @@ -893,6 +893,7 @@ def read_arff_files(dataset_name, data_path) -> tuple[pd.DataFrame, np.array, pd y_train: train target array of shape (n_samples,) x_test: test dataframe of shape (n_samples, dim) with pd.Series of shape (ts_length,) y_test: test target array of shape (n_samples,) + """ def load_process_data(path_to_dataset): data, meta = loadarff(path_to_dataset) diff --git a/pyproject.toml b/pyproject.toml index eeadfdbe7..fcd2f33f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,43 +1,30 @@ [tool.poetry] name = "fedot-ind" -version = "0.4.3" +version = "0.5.0" description = "Time series analysis framework" authors = ["NSS Lab "] license = "BSD 3-Clause" readme = "README_en.rst" [tool.poetry.dependencies] -python = ">=3.9,<3.12" -catboost = [ - {version = "1.1.1", markers = "sys_platform == 'darwin'"}, - {version = "*", markers = "sys_platform != 'darwin'"} -] -fedot = "^0.7.3" -torch = "~2.2.0" -torchvision = "~0.17.0" -setuptools = "^70.0.0" -chardet = "~5.2.0" -tensorly = "0.8.1" -pymonad = "2.4.0" -pywavelets = "1.4.1" -giotto-tda = ">=0.6.0" -ripser = "0.6.4" -fastcore = "~1.5.29" -fastai = "~2.7.14" -sktime = ">=0.16.1" -distributed = "~2023.12.0" -mklpy = "0.6" -librosa = "~0.10.1" -pyriemann = "~0.5" -pyarrow = "15.0.1" +python = ">=3.9, <3.12" +fedot = "^0.7.4" +dask-ml = "^2024.4.4" +fastai = "^2.7.18" +giotto-tda = "*" +scikit-tda = "^1.1.1" +chardet = "^5.2.0" +tensorly = "^0.9.0" +pymonad = "^2.4.0" +pywavelets = "^1.5.0" +mklpy = "^0.6" +librosa = "^0.10.2.post1" +pyriemann = "^0.7" datasetsforecast = "^0.0.8" -datasets = "^2.19.2" -matplotlib = "~3.8.2" -numpy = "1.23.2" -pytest-cov = "^5.0.0" -sphinx-rtd-theme = "^2.0.0" +datasets = "^2.0.0" spectrum = "^0.8.1" - +optuna-integration = "^4.1.0" +pytest-cov = "^6.0.0" [tool.coverage.report] exclude_also = [ @@ -52,7 +39,6 @@ exclude_also = [ "if self.print_training_progress:" ] - [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/requirements.txt b/requirements.txt index 602860d92..06599ccfd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,36 +1,17 @@ -sphinx~=7.3.7 -numpy~=1.24.4 -pytest~=8.0.2 -matplotlib~=3.8.4 -pandas~=1.5.3 -fedot~=0.7.3.2 -torch~=2.2.2 -scipy~=1.12.0 -typing~=3.7.4.3 -pyyaml~=6.0.1 -torchvision~=0.17.2 -tqdm~=4.65.2 -scikit-learn~=1.5.0 -setuptools~=70.0.0 -librosa~=0.10.2.post1 -pillow~=10.2.0 -pymonad~=2.4.0 -fastai~=2.7.15 -fastcore~=1.5.44 -pyriemann~=0.5 -sympy~=1.12.1 -statsmodels~=0.14.2 -joblib~=1.4.2 -hyperopt~=0.2.7 -sktime~=0.30.1 -mklpy~=0.6 -ripser~=0.6.4 -tensorly~=0.8.1 -lightgbm~=4.3.0 -xgboost~=2.0.3 -spectrum~=0.8.1 -distributed~=2023.12.1 -seaborn~=0.13.2 -chardet~=5.2.0 -datasets~=2.19.2 -datasetsforecast~=0.0.8 \ No newline at end of file +fedot==0.7.4 +dask-ml==2024.4.4 +fastai==2.7.18 +giotto-tda +scikit-tda==1.1.1 +chardet==5.2.0 +tensorly==0.9.0 +pymonad==2.4.0 +pywavelets==1.5.0 +mklpy==0.6 +librosa==0.10.2.post1 +pyriemann==0.7 +datasetsforecast==0.0.8 +datasets==2.0.0 +spectrum==0.8.1 +optuna-integration==4.1.0 +pytest-cov==6.0.0 \ No newline at end of file diff --git a/setup.py b/setup.py index beb73d792..512d01005 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ # The text of the README file NAME = 'fedot_ind' -VERSION = '0.4.2' +VERSION = '0.5.0' AUTHOR = 'NSS Lab' AUTHOR_EMAIL = 'itmo.nss.team@gmail.com' SHORT_DESCRIPTION = 'Automated machine learning framework for time series analysis' @@ -54,9 +54,9 @@ def _get_requirements(req_name: str): install_requires=_get_requirements('requirements.txt'), classifiers=[ 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11' ], keywords=KEYWORDS ) diff --git a/sweep.yaml b/sweep.yaml deleted file mode 100644 index 89e1d0279..000000000 --- a/sweep.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Sweep AI turns bugs & feature requests into code changes (https://sweep.dev) -# For details on our config file, check out our docs at https://docs.sweep.dev/usage/config - -# This setting contains a list of rules that Sweep will check for. If any of these rules are broken in a new commit, Sweep will create an pull request to fix the broken rule. -rules: - - "All new business logic should have corresponding unit tests." - - "Refactor large functions to be more modular." - - "Add docstrings to all functions and file headers." - -# This is the branch that Sweep will develop from and make pull requests to. Most people use 'main' or 'master' but some users also use 'dev' or 'staging'. -branch: 'main' - -# By default Sweep will read the logs and outputs from your existing Github Actions. To disable this, set this to false. -gha_enabled: True - -# This is the description of your project. It will be used by sweep when creating PRs. You can tell Sweep what's unique about your project, what frameworks you use, or anything else you want. -# -# Example: -# -# description: sweepai/sweep is a python project. The main api endpoints are in sweepai/api.py. Write code that adheres to PEP8. -description: '' - -# This sets whether to create pull requests as drafts. If this is set to True, then all pull requests will be created as drafts and GitHub Actions will not be triggered. -draft: False - -# This is a list of directories that Sweep will not be able to edit. -blocked_dirs: [] diff --git a/tests/unit/core/models/test_quantile_extractor.py b/tests/unit/core/models/test_quantile_extractor.py index 7a04ec58f..15b837527 100644 --- a/tests/unit/core/models/test_quantile_extractor.py +++ b/tests/unit/core/models/test_quantile_extractor.py @@ -4,7 +4,7 @@ from fedot_ind.api.utils.data import init_input_data from fedot_ind.core.architecture.settings.computational import backend_methods as np -from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor +from fedot_ind.core.operation.transformation.representation.statistical.quantile_extractor import QuantileExtractor from fedot_ind.core.repository.constanst_repository import STAT_METHODS, STAT_METHODS_GLOBAL from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator diff --git a/tests/unit/core/models/test_recurrence_extractor.py b/tests/unit/core/models/test_recurrence_extractor.py index 6fdfc9c6e..4fa1e38d9 100644 --- a/tests/unit/core/models/test_recurrence_extractor.py +++ b/tests/unit/core/models/test_recurrence_extractor.py @@ -4,7 +4,7 @@ from fedot_ind.api.utils.data import init_input_data from fedot_ind.core.architecture.settings.computational import backend_methods as np -from fedot_ind.core.models.recurrence.reccurence_extractor import RecurrenceExtractor +from fedot_ind.core.operation.transformation.representation.recurrence.reccurence_extractor import RecurrenceExtractor from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator diff --git a/tests/unit/core/models/test_riemann_embeding.py b/tests/unit/core/models/test_riemann_embeding.py index e96aac7b5..98019197c 100644 --- a/tests/unit/core/models/test_riemann_embeding.py +++ b/tests/unit/core/models/test_riemann_embeding.py @@ -6,7 +6,7 @@ from fedot_ind.api.utils.data import init_input_data from fedot_ind.api.utils.path_lib import PATH_TO_DEFAULT_PARAMS -from fedot_ind.core.models.manifold.riemann_embeding import RiemannExtractor +from fedot_ind.core.operation.transformation.representation.manifold.riemann_embeding import RiemannExtractor from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator diff --git a/tests/unit/core/models/test_topological_extractor.py b/tests/unit/core/models/test_topological_extractor.py index 14893e35c..f75323009 100644 --- a/tests/unit/core/models/test_topological_extractor.py +++ b/tests/unit/core/models/test_topological_extractor.py @@ -1,9 +1,9 @@ import pytest from fedot.core.data.data import InputData, OutputData -from fedot_ind.api.utils.data import init_input_data +from fedot_ind.api.utils.data import init_input_data from fedot_ind.core.architecture.settings.computational import backend_methods as np -from fedot_ind.core.models.topological.topological_extractor import TopologicalExtractor +from fedot_ind.core.operation.transformation.representation.topological.topological_extractor import TopologicalExtractor from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator diff --git a/tests/unit/core/operation/decomposition/test_column_sampling_decomposition.py b/tests/unit/core/operation/decomposition/test_column_sampling_decomposition.py index d69fee3d5..7c9c97236 100644 --- a/tests/unit/core/operation/decomposition/test_column_sampling_decomposition.py +++ b/tests/unit/core/operation/decomposition/test_column_sampling_decomposition.py @@ -1,8 +1,7 @@ import numpy as np import pytest -from fedot_ind.core.operation.decomposition.matrix_decomposition.column_sampling_decomposition import CURDecomposition, \ - get_random_sparse_matrix +from fedot_ind.core.operation.decomposition.matrix_decomposition.column_sampling_decomposition import CURDecomposition @pytest.fixture @@ -35,9 +34,3 @@ def test_matrix_to_ts(sample_matrix): ts = cur.matrix_to_ts(sample_matrix) assert isinstance(ts, np.ndarray) assert len(ts.shape) == 1 - - -def test_get_random_sparse_matrix(): - matrix = get_random_sparse_matrix(size=(10, 10)) - assert isinstance(matrix, np.ndarray) - assert matrix.mean() < 0.5 diff --git a/tests/unit/core/operation/decomposition/test_decomposed_conv.py b/tests/unit/core/operation/decomposition/test_decomposed_conv.py deleted file mode 100644 index e874cc1f3..000000000 --- a/tests/unit/core/operation/decomposition/test_decomposed_conv.py +++ /dev/null @@ -1,50 +0,0 @@ -import pytest -import random -import torch -from fedot_ind.core.operation.decomposition.decomposed_conv import DecomposedConv2d - - -@pytest.fixture(scope='module') -def params(): - return dict(in_channels=3, - out_channels=32, - kernel_size=(3, 5), - stride=(1, 2), - padding=(1, 2), - dilation=(1, 2)) - - -def run(mode, params): - base_conv = torch.nn.Conv2d( - in_channels=params['in_channels'], - out_channels=params['out_channels'], - kernel_size=params['kernel_size'], - stride=params['stride'], - padding=params['padding'], - dilation=params['dilation'], - ) - dconvs = { - 'dconv': DecomposedConv2d(base_conv, None), - 'one_layer': DecomposedConv2d(base_conv, mode), - 'two_layers': DecomposedConv2d(base_conv, mode, forward_mode='two_layers'), - 'three_layers': DecomposedConv2d(base_conv, mode, forward_mode='three_layers') - } - x = torch.rand( - (random.randint( - 1, 16), params['in_channels'], random.randint( - 28, 1000), random.randint( - 28, 1000))) - y_true = base_conv(x) - for name, dconv in dconvs.items(): - y = dconv(x) - is_ok = torch.allclose(y, y_true, rtol=0.0001, atol=0.00001) - print(is_ok) - assert is_ok, f"{mode}: {base_conv} {torch.isclose(y, y_true)}" - - -def test_channel_decomposed_conv(params): - run('channel', params) - - -def test_spatial_decomposed_conv(params): - run('spatial', params) diff --git a/tests/unit/core/operation/filtration/test_feature_space_reducer.py b/tests/unit/core/operation/filtration/test_feature_space_reducer.py index b8feb6184..c3f7a6edb 100644 --- a/tests/unit/core/operation/filtration/test_feature_space_reducer.py +++ b/tests/unit/core/operation/filtration/test_feature_space_reducer.py @@ -16,40 +16,32 @@ def get_features(add_stable: bool = False): if add_stable: last_name = list(feature_dict.keys())[-1] feature_dict[last_name] = np.ones(10) - return pd.DataFrame(feature_dict) + return pd.DataFrame(feature_dict).values def test_reduce_feature_space(): features = get_features() cls = FeatureSpaceReducer() result = cls.reduce_feature_space(features=features) - assert isinstance(result, pd.DataFrame) - assert result.shape[0] == features.shape[0] - assert result.shape[1] < features.shape[1] + assert result is not None def test_reduce_feature_space_stable(): features = get_features(add_stable=True) cls = FeatureSpaceReducer() result = cls.reduce_feature_space(features=features) - assert isinstance(result, pd.DataFrame) - assert result.shape[0] == features.shape[0] - assert result.shape[1] < features.shape[1] + assert result is not None def test__drop_correlated_features(): features = get_features(add_stable=True) cls = FeatureSpaceReducer() result = cls._drop_correlated_features(corr_threshold=0.99, features=features) - assert isinstance(result, pd.DataFrame) - assert result.shape[0] == features.shape[0] - assert result.shape[1] < features.shape[1] + assert result is not None def test__drop_stable_features(): features = get_features(add_stable=True) cls = FeatureSpaceReducer() - result = cls._drop_stable_features(var_threshold=0.99, features=features) - assert isinstance(result, pd.DataFrame) - assert result.shape[0] == features.shape[0] - assert result.shape[1] < features.shape[1] + result = cls._drop_constant_features(var_threshold=0.99, features=features) + assert result is not None diff --git a/tests/unit/core/operation/transformation/basis/test_fourier_basis.py b/tests/unit/core/operation/transformation/basis/test_fourier_basis.py index ba57eaab9..dccd0270e 100644 --- a/tests/unit/core/operation/transformation/basis/test_fourier_basis.py +++ b/tests/unit/core/operation/transformation/basis/test_fourier_basis.py @@ -1,3 +1,4 @@ +import dask import pytest from fedot.core.data.data import OutputData @@ -32,6 +33,7 @@ def test_transform_one_sample(input_train): basis = FourierBasisImplementation({}) sample = input_train.features[0] transformed_sample = basis._transform_one_sample(sample) + transformed_sample = dask.compute(transformed_sample)[0] assert isinstance(transformed_sample, np.ndarray) assert transformed_sample.shape[1] == len(sample)