diff --git a/examples/real_world_examples/benchmark_example/analysis of results/pdl_uni_benchmark.ipynb b/examples/real_world_examples/benchmark_example/analysis of results/pdl_uni_benchmark.ipynb new file mode 100644 index 000000000..2a9926639 --- /dev/null +++ b/examples/real_world_examples/benchmark_example/analysis of results/pdl_uni_benchmark.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from fedot_ind.tools.serialisation.path_lib import PROJECT_PATH\n", + "from fedot_ind.core.repository.constanst_repository import UNI_CLF_BENCH\n", + "import pandas as pd\n", + "from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, DEFAULT_CLF_AUTOML_CONFIG\n", + "from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "METRIC_NAMES = ('f1', 'accuracy', 'precision', 'roc_auc')\n", + "COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG\n", + "AUTOML_LEARNING_STRATEGY = dict(timeout=2,\n", + " pop_size=10,\n", + " n_jobs=-1,\n", + " num_of_generations=15)\n", + "\n", + "LEARNING_CONFIG = {'learning_strategy': 'from_scratch',\n", + " 'learning_strategy_params': AUTOML_LEARNING_STRATEGY,\n", + " 'optimisation_loss': {'quality_loss': 'accuracy'}}\n", + "\n", + "INDUSTRIAL_CONFIG = {'problem': 'classification'}\n", + "\n", + "API_CONFIG = {'industrial_config': INDUSTRIAL_CONFIG,\n", + " 'automl_config': DEFAULT_CLF_AUTOML_CONFIG,\n", + " 'learning_config': LEARNING_CONFIG,\n", + " 'compute_config': COMPUTE_CONFIG}\n", + "BENCHMARK_PATH = PROJECT_PATH + '/examples/real_world_examples/benchmark_example/classification/UCR_UNI_23_01_25'" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "def filter_datasets(UNI_CLF_BENCH, max_classes: int = 10, max_samples: int = 1000):\n", + " UNI_CLF_BENCH_METADATA = pd.read_csv(PROJECT_PATH + '/fedot_ind/core/repository/data/ts_benchmark_metadata.csv')\n", + " datasets_filtred_by_classes = UNI_CLF_BENCH_METADATA[UNI_CLF_BENCH_METADATA['Class'] <= max_classes]\n", + " datasets_filtred_by_samples = datasets_filtred_by_classes[datasets_filtred_by_classes['Train ']\n", + " <= max_samples]\n", + " datasets_filtred_by_samples = datasets_filtred_by_samples[datasets_filtred_by_samples['Test ']\n", + " <= max_samples]['Name'].values.tolist()\n", + " UNI_CLF_BENCH = [x for x in UNI_CLF_BENCH if x in datasets_filtred_by_samples ]\n", + " UNI_CLF_BENCH_METADATA = UNI_CLF_BENCH_METADATA[UNI_CLF_BENCH_METADATA['Name'].isin(datasets_filtred_by_samples)]\n", + " return UNI_CLF_BENCH, UNI_CLF_BENCH_METADATA" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "UNI_CLF_BENCH, UNI_CLF_BENCH_METADATA = filter_datasets(UNI_CLF_BENCH)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "api_agent = ApiTemplate(api_config=API_CONFIG, metric_list=METRIC_NAMES)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "bench_results = api_agent.load_result(benchmark_path=BENCHMARK_PATH)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "df_list = list(bench_results.values())" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [], + "source": [ + "new_df=pd.merge(\n", + " left=df_list[0],\n", + " right=df_list[1],\n", + " how='left',\n", + " left_on=['dataset'],\n", + " right_on=['dataset'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "data": { + "text/plain": " accuracy_pdl_rf f1_pdl_rf precision_pdl_rf dataset \\\n0 0.120 0.120 0.120 ACSF1 \n1 0.497 0.509 0.516 ArrowHead \n2 0.167 0.145 0.132 Beef \n3 0.400 0.400 0.400 BeetleFly \n4 0.500 0.000 0.250 BirdChicken \n5 0.280 0.285 0.299 BME \n6 0.317 0.261 0.246 Car \n7 0.226 0.123 0.084 CBF \n8 0.254 0.158 0.294 Chinatown \n9 0.429 0.429 0.431 Coffee \n10 0.432 0.437 0.432 Computers \n11 0.405 0.354 0.263 DiatomSizeReduction \n\n accuracy_rf f1_rf precision_rf \n0 0.120 0.120 0.123 \n1 0.331 0.317 0.312 \n2 0.133 0.124 0.117 \n3 0.450 0.353 0.445 \n4 0.600 0.556 0.604 \n5 0.300 0.288 0.285 \n6 0.267 0.274 0.318 \n7 0.336 0.169 0.112 \n8 0.813 0.714 0.775 \n9 0.929 0.923 0.928 \n10 0.796 0.792 0.796 \n11 0.892 0.844 0.680 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
accuracy_pdl_rff1_pdl_rfprecision_pdl_rfdatasetaccuracy_rff1_rfprecision_rf
00.1200.1200.120ACSF10.1200.1200.123
10.4970.5090.516ArrowHead0.3310.3170.312
20.1670.1450.132Beef0.1330.1240.117
30.4000.4000.400BeetleFly0.4500.3530.445
40.5000.0000.250BirdChicken0.6000.5560.604
50.2800.2850.299BME0.3000.2880.285
60.3170.2610.246Car0.2670.2740.318
70.2260.1230.084CBF0.3360.1690.112
80.2540.1580.294Chinatown0.8130.7140.775
90.4290.4290.431Coffee0.9290.9230.928
100.4320.4370.432Computers0.7960.7920.796
110.4050.3540.263DiatomSizeReduction0.8920.8440.680
\n
" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/real_world_examples/benchmark_example/classification/PDL_uni.py b/examples/real_world_examples/benchmark_example/classification/PDL_uni.py index a6e801768..f23ac57ce 100644 --- a/examples/real_world_examples/benchmark_example/classification/PDL_uni.py +++ b/examples/real_world_examples/benchmark_example/classification/PDL_uni.py @@ -1,6 +1,25 @@ +import os + +import pandas as pd + from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, DEFAULT_CLF_AUTOML_CONFIG from fedot_ind.core.repository.constanst_repository import UNI_CLF_BENCH +from fedot_ind.tools.serialisation.path_lib import PROJECT_PATH + + +def filter_datasets(UNI_CLF_BENCH, max_classes: int = 10, max_samples: int = 1000): + UNI_CLF_BENCH_METADATA = pd.read_csv(PROJECT_PATH + '/fedot_ind/core/repository/data/ts_benchmark_metadata.csv') + datasets_filtred_by_classes = UNI_CLF_BENCH_METADATA[UNI_CLF_BENCH_METADATA['Class'] <= max_classes] + datasets_filtred_by_samples = datasets_filtred_by_classes[datasets_filtred_by_classes['Train '] + <= max_samples] + datasets_filtred_by_samples = datasets_filtred_by_samples[datasets_filtred_by_samples['Test '] + <= max_samples]['Name'].values.tolist() + already_eval = os.listdir('./UCR_UNI_23_01_25/rf') + UNI_CLF_BENCH = [x for x in UNI_CLF_BENCH if x in datasets_filtred_by_samples and x not in already_eval] + UNI_CLF_BENCH_METADATA = UNI_CLF_BENCH_METADATA[UNI_CLF_BENCH_METADATA['Name'].isin(datasets_filtred_by_samples)] + return UNI_CLF_BENCH, UNI_CLF_BENCH_METADATA + model_to_compare = [{0: ['quantile_extractor', 'rf']}, {0: ['quantile_extractor', 'pdl_clf']} @@ -8,7 +27,8 @@ model_name = ['rf', 'pdl_rf'] finutune_existed_model = [True, True] BENCHMARK = 'UCR_UNI' -BENCHMARK_PARAMS = {'experiment_date': '22_01_25', +UNI_CLF_BENCH, UNI_CLF_BENCH_METADATA = filter_datasets(UNI_CLF_BENCH) +BENCHMARK_PARAMS = {'experiment_date': '23_01_25', 'metadata': None, 'datasets': UNI_CLF_BENCH, 'model_to_compare': (model_to_compare, model_name, finutune_existed_model)} diff --git a/examples/real_world_examples/benchmark_example/detection/ts_anomaly_detection_skab_bench.py b/examples/real_world_examples/benchmark_example/detection/ts_anomaly_detection_skab_bench.py new file mode 100644 index 000000000..7b7ec0960 --- /dev/null +++ b/examples/real_world_examples/benchmark_example/detection/ts_anomaly_detection_skab_bench.py @@ -0,0 +1,71 @@ +import os + +from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate +from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, DEFAULT_CLF_AUTOML_CONFIG +from tools.test_load_data import EXAMPLES_DATA_PATH + + +def prepare_skab_benchmark(): + ENCODER_LEARNING_PARAMS = {'epochs': 150, + 'lr': 0.001, + 'device': 'cpu' + } + model_to_compare = [ + # {0: ['iforest_detector']}, + {0: [('conv_ae_detector', ENCODER_LEARNING_PARAMS)]}, + # {0: ['stat_detector']}, + # {} + ] + model_name = [ + # 'iforest', + 'conv_encoder', + # 'stat_detector', + # 'industrial' + ] + finutune_existed_model = [ + True, + True, + # True, False + ] + BENCHMARK = 'SKAB' + folder = 'valve1' + datasets = os.listdir(EXAMPLES_DATA_PATH + f'/benchmark/detection/data/{folder}') + datasets = [x.split('.')[0] for x in datasets] + BENCHMARK_PARAMS = {'experiment_date': '23_01_25', + 'metadata': {'folder': folder}, + 'datasets': datasets, + 'model_to_compare': (model_to_compare, model_name, finutune_existed_model)} + return BENCHMARK, BENCHMARK_PARAMS + + +METRIC_NAMES = ('nab', 'accuracy') +EVAL_REGIME = True + +COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG +AUTOML_CONFIG = DEFAULT_CLF_AUTOML_CONFIG +AUTOML_LEARNING_STRATEGY = dict(timeout=1, + n_jobs=2, + pop_size=10, + logging_level=0) + +LEARNING_CONFIG = {'learning_strategy': 'from_scratch', + 'learning_strategy_params': AUTOML_LEARNING_STRATEGY, + 'optimisation_loss': {'quality_loss': 'accuracy'}} + +INDUSTRIAL_CONFIG = {'strategy': 'anomaly_detection', + 'problem': 'classification', + 'strategy_params': {'detection_window': 10, + 'train_data_size': 'anomaly-free', + 'data_type': 'time_series'}} + +API_CONFIG = {'industrial_config': INDUSTRIAL_CONFIG, + 'automl_config': AUTOML_CONFIG, + 'learning_config': LEARNING_CONFIG, + 'compute_config': COMPUTE_CONFIG} + +if __name__ == "__main__": + api_agent = ApiTemplate(api_config=API_CONFIG, metric_list=METRIC_NAMES) + BENCHMARK, BENCHMARK_PARAMS = prepare_skab_benchmark() + if EVAL_REGIME: + api_agent.evaluate_benchmark(benchmark_name=BENCHMARK, + benchmark_params=BENCHMARK_PARAMS) diff --git a/examples/real_world_examples/benchmark_example/forecasting/time_series_uni_forecast_m4.py b/examples/real_world_examples/benchmark_example/forecasting/time_series_uni_forecast_m4.py index 00d9b704a..0b5b6093c 100644 --- a/examples/real_world_examples/benchmark_example/forecasting/time_series_uni_forecast_m4.py +++ b/examples/real_world_examples/benchmark_example/forecasting/time_series_uni_forecast_m4.py @@ -1,3 +1,5 @@ +import os + from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG from fedot_ind.core.repository.constanst_repository import M4_FORECASTING_BENCH, M4_FORECASTING_LENGTH @@ -6,10 +8,6 @@ 'lr': 0.001, 'device': 'cpu' } -# composite = {0: ['lagged', 'ridge'], -# 1: ['ar'], -# 'head': ['bagging']} -# linear = {0:['smoothing','ar']} model_to_compare = [ {0: ['smoothing', 'lagged', 'ridge']}, {}, @@ -19,9 +17,9 @@ model_name = ['lagged_regression', 'industrial', 'deepar', 'ar'] finutune_existed_model = [True, False, True, True] BENCHMARK = 'M4' -EVALUATED = ['D1002', 'D1019', 'D1091', 'D1032'] +EVALUATED = [] DATASETS = [x for x in M4_FORECASTING_BENCH if x not in EVALUATED] -BENCHMARK_PARAMS = {'experiment_date': '22_01_25', +BENCHMARK_PARAMS = {'experiment_date': '23_01_25', 'metadata': M4_FORECASTING_LENGTH, 'datasets': DATASETS, 'model_to_compare': (model_to_compare, model_name, finutune_existed_model)} @@ -54,5 +52,15 @@ if __name__ == "__main__": api_agent = ApiTemplate(api_config=API_CONFIG, metric_list=('rmse', 'mae')) if EVAL_REGIME: - api_agent.evaluate_benchmark(benchmark_name=BENCHMARK, - benchmark_params=BENCHMARK_PARAMS) + for attempt in range(100): + try: + EVALUATED = os.listdir('./M4_23_01_25/ar') + DATASETS = [x for x in M4_FORECASTING_BENCH if x not in EVALUATED] + BENCHMARK_PARAMS = {'experiment_date': '23_01_25', + 'metadata': M4_FORECASTING_LENGTH, + 'datasets': DATASETS, + 'model_to_compare': (model_to_compare, model_name, finutune_existed_model)} + api_agent.evaluate_benchmark(benchmark_name=BENCHMARK, + benchmark_params=BENCHMARK_PARAMS) + except Exception: + print('ERROR') diff --git a/fedot_ind/core/architecture/pipelines/abstract_pipeline.py b/fedot_ind/core/architecture/pipelines/abstract_pipeline.py index 49a3c312a..30f9b764d 100644 --- a/fedot_ind/core/architecture/pipelines/abstract_pipeline.py +++ b/fedot_ind/core/architecture/pipelines/abstract_pipeline.py @@ -1,6 +1,7 @@ import os from typing import Union +import numpy as np import pandas as pd from fedot.core.data.data import InputData from fedot.core.pipelines.pipeline_builder import PipelineBuilder @@ -13,6 +14,7 @@ from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels from fedot_ind.core.repository.model_repository import NEURAL_MODEL from fedot_ind.tools.loader import DataLoader +from tools.test_load_data import EXAMPLES_DATA_PATH BENCHMARK = 'M4' @@ -118,7 +120,8 @@ def __init__(self, def _prepare_dataset(self, dataset): dataset_is_dict = isinstance(dataset, dict) industrial_config = self.api_config.get('industrial_config', {}) - have_specified_industrial_strategy = 'strategy' in industrial_config.keys() + have_specified_industrial_strategy = 'strategy' in industrial_config.keys() \ + or 'strategy_params' in industrial_config.keys() if have_specified_industrial_strategy: custom_dataset_strategy = industrial_config['strategy'] @@ -129,7 +132,7 @@ def _prepare_dataset(self, dataset): train_data, test_data = Either(value=dataset, monoid=[dataset, - dataset_is_dict]). \ + dataset_is_dict or have_specified_industrial_strategy]). \ either(left_function=loader.load_data, right_function=lambda dataset_dict: loader.load_custom_data(custom_dataset_strategy)) return train_data, test_data @@ -166,27 +169,34 @@ def eval(self, train_data=self.train_data, model_to_tune=pipeline_to_tune, tuning_params={ - 'tuning_timeout': 5}), - not finetune]). either( + 'tuning_timeout': 3}), + not finetune]).either( left_function=lambda tuning_data: self.industrial_class.finetune( **tuning_data, return_only_fitted=return_only_fitted), right_function=self.industrial_class.fit) return self._get_result(self.test_data) - def load_result(self, benchmark_path, benchmark_dict: dict): + def load_result(self, benchmark_path): dir_list = os.listdir(benchmark_path) result_dict = {} for model_dir in dir_list: - datasets_dir = os.listdir(model_dir) - df_with_results = [pd.read_csv(f'{dataset}/metrics.csv') for dataset in datasets_dir] - df_with_results = pd.concat(df_with_results, ignore_index=True) + datasets_dir = os.listdir(f'{benchmark_path}/{model_dir}') + df_with_results = [pd.read_csv(f'{benchmark_path}/{model_dir}/{dataset}/metrics.csv') + for dataset in datasets_dir] + df_with_results = pd.concat(df_with_results) + del df_with_results['Unnamed: 0'] + df_with_results.columns = [f'{x}_{model_dir}' for x in df_with_results.columns] + df_with_results['dataset'] = datasets_dir result_dict.update({model_dir: df_with_results}) + return result_dict def evaluate_benchmark(self, benchmark_name, benchmark_params: dict): for dataset in benchmark_params['datasets']: if benchmark_name.__contains__('M4'): dataset_for_eval = self._prepare_forecasting_data(dataset, benchmark_name, benchmark_params) + elif benchmark_name.__contains__('SKAB'): + dataset_for_eval = self._prepare_skab_data(dataset, benchmark_name, benchmark_params) else: dataset_for_eval = dataset for model_impl, model_name, finetune_strategy in zip(*benchmark_params['model_to_compare']): @@ -194,7 +204,6 @@ def evaluate_benchmark(self, benchmark_name, benchmark_params: dict): self.api_config['compute_config']['output_folder'] = f'./{benchmark_name}_{date}/{model_name}/{dataset}' result_dict = self.eval(dataset=dataset_for_eval, initial_assumption=model_impl, finetune=finetune_strategy) - _ = 1 def _prepare_forecasting_data(self, dataset, benchmark_name, benchmark_dict): prefix = dataset[0] @@ -205,3 +214,19 @@ def _prepare_forecasting_data(self, dataset, benchmark_name, benchmark_dict): self.api_config['industrial_config']['task_params']['forecast_length'] = horizon self.api_config['automl_config']['task_params']['forecast_length'] = horizon return dataset_for_eval + + def _prepare_skab_data(self, dataset, benchmark_name, benchmark_dict): + folder = benchmark_dict['metadata']['folder'] + path_to_result = EXAMPLES_DATA_PATH + f'/benchmark/detection/data/{folder}/{dataset}.csv' + df = pd.read_csv(path_to_result, index_col='datetime', sep=';', parse_dates=True) + train_idx = self.api_config['industrial_config']['strategy_params']['train_data_size'] + if isinstance(train_idx, str): + train_data = EXAMPLES_DATA_PATH + f'/benchmark/detection/data/{train_idx}/{train_idx}.csv' + train_data = pd.read_csv(train_data, index_col='datetime', sep=';', parse_dates=True) + label = np.array([0 for x in range(len(train_data))]) + dataset_for_eval = {'train_data': (train_data.values, label), + 'test_data': (df.iloc[:, :-2].values, df.iloc[:, -2].values)} + else: + dataset_for_eval = {'train_data': (df.iloc[:train_idx, :-2].values, df.iloc[:train_idx, -2].values), + 'test_data': (df.iloc[train_idx:, :-2].values, df.iloc[train_idx:, -2].values)} + return dataset_for_eval diff --git a/fedot_ind/core/architecture/preprocessing/data_convertor.py b/fedot_ind/core/architecture/preprocessing/data_convertor.py index 95ddd9922..636a85e6e 100644 --- a/fedot_ind/core/architecture/preprocessing/data_convertor.py +++ b/fedot_ind/core/architecture/preprocessing/data_convertor.py @@ -21,8 +21,8 @@ from fedot_ind.core.architecture.settings.computational import backend_methods as np from fedot_ind.core.architecture.settings.computational import default_device from fedot_ind.core.models.detection.anomaly.algorithms.arima_fault_detector import ARIMAFaultDetector -from fedot_ind.core.models.detection.anomaly.algorithms.convolutional_autoencoder_detector import \ - ConvolutionalAutoEncoderDetector +# from fedot_ind.core.models.detection.anomaly.algorithms.convolutional_autoencoder_detector import \ +# ConvolutionalAutoEncoderDetector from fedot_ind.core.models.detection.anomaly.algorithms.isolation_forest_detector import IsolationForestDetector from fedot_ind.core.models.detection.anomaly.algorithms.lstm_autoencoder_detector import LSTMAutoEncoderDetector from fedot_ind.core.models.detection.custom.stat_detector import StatisticalDetector @@ -439,7 +439,7 @@ def is_one_class_operation(self): OneClassSVM, StatisticalDetector, ARIMAFaultDetector, - ConvolutionalAutoEncoderDetector, + # ConvolutionalAutoEncoderDetector, LSTMAutoEncoderDetector, ) return isinstance(self.operation_implementation, detector_models) diff --git a/fedot_ind/core/metrics/pipeline.py b/fedot_ind/core/metrics/pipeline.py index 7aa0620c6..ecaf7d8b2 100644 --- a/fedot_ind/core/metrics/pipeline.py +++ b/fedot_ind/core/metrics/pipeline.py @@ -80,135 +80,3 @@ def industrial_evaluate_pipeline(self, graph: Pipeline) -> Fitness: # prepared_pipeline. return to_fitness(folds_metrics, self._objective.is_multi_objective) -# -# class IndustrialPipelineObjectiveEvaluate(ObjectiveEvaluate[Pipeline]): -# """ -# Evaluator of Objective that requires train and test data for metric evaluation. -# Its role is to prepare graph on train-data and then evaluate metrics on test data. -# -# :param objective: Objective for evaluating metrics on pipelines. -# :param data_producer: Producer of data folds, each fold is a tuple of (train_data, test_data). -# If it returns a single fold, it's effectively a hold-out validation. For many folds it's k-folds. -# :param time_constraint: Optional time constraint for pipeline.fit. -# :param validation_blocks: Number of validation blocks, optional, used only for time series validation. -# :param pipelines_cache: Cache manager for fitted models, optional. -# :param preprocessing_cache: Cache manager for optional preprocessing encoders and imputers, optional. -# :param eval_n_jobs: number of jobs used to evaluate the objective. -# :params do_unfit: unfit graph after evaluation -# """ -# -# def __init__(self, -# objective: Objective, -# data_producer: DataSource, -# time_constraint: Optional[timedelta] = None, -# validation_blocks: Optional[int] = None, -# pipelines_cache: Optional[OperationsCache] = None, -# preprocessing_cache: Optional[PreprocessingCache] = None, -# eval_n_jobs: int = 1, -# do_unfit: bool = True): -# super().__init__(objective, eval_n_jobs=eval_n_jobs) -# self._data_producer = data_producer -# self._time_constraint = time_constraint -# self._validation_blocks = validation_blocks -# self._pipelines_cache = pipelines_cache -# self._preprocessing_cache = preprocessing_cache -# self._log = default_log(self) -# self._do_unfit = do_unfit -# -# def evaluate(self, graph: Pipeline) -> Fitness: -# # Seems like a workaround for situation when logger is lost -# # when adapting and restoring it to/from OptGraph. -# graph.log = self._log -# -# graph_id = graph.root_node.descriptive_id -# self._log.debug(f'Pipeline {graph_id} fit started') -# -# folds_metrics = [] -# for fold_id, (train_data, test_data) in enumerate(self._data_producer()): -# try: -# prepared_pipeline = self.prepare_graph(graph, train_data, fold_id, self._eval_n_jobs) -# except Exception as ex: -# self._log.warning(f'Unsuccessful pipeline fit during fitness evaluation. ' -# f'Skipping the pipeline. Exception <{ex}> on {graph_id}') -# stack_trace = traceback.format_exc() -# save_pipeline_for_debug(graph, train_data, test_data, ex, stack_trace) -# break # if even one fold fails, the evaluation stops -# -# evaluated_fitness = self._objective(prepared_pipeline, -# reference_data=test_data, -# validation_blocks=self._validation_blocks) -# if evaluated_fitness.valid: -# folds_metrics.append(evaluated_fitness.values) -# else: -# self._log.warning(f'Invalid fitness after objective evaluation. ' -# f'Skipping the graph: {graph_id}', raise_if_test=True) -# if self._do_unfit: -# graph.unfit() -# if folds_metrics: -# folds_metrics = tuple(np.mean(folds_metrics, axis=0)) # averages for each metric over folds -# self._log.debug(f'Pipeline {graph_id} with evaluated metrics: {folds_metrics}') -# else: -# folds_metrics = None -# -# # prepared_pipeline. -# -# return to_fitness(folds_metrics, self._objective.is_multi_objective) -# -# def prepare_graph(self, graph: Pipeline, train_data: InputData, -# fold_id: Optional[int] = None, n_jobs: int = -1) -> Pipeline: -# """ -# Fit pipeline before metric evaluation can be performed. -# :param graph: pipeline for train & validation -# :param train_data: InputData for training pipeline -# :param fold_id: id of the fold in cross-validation, used for cache requests. -# :param n_jobs: number of parallel jobs for preparation -# """ -# if graph.is_fitted: -# # the expected behaviour for the remote evaluation -# return graph -# -# graph.unfit() -# -# # load preprocessing -# graph.try_load_from_cache(self._pipelines_cache, self._preprocessing_cache, fold_id) -# graph.fit( -# train_data, -# n_jobs=n_jobs, -# time_constraint=self._time_constraint -# ) -# -# if self._pipelines_cache is not None: -# self._pipelines_cache.save_pipeline(graph, fold_id) -# if self._preprocessing_cache is not None: -# self._preprocessing_cache.add_preprocessor(graph, fold_id) -# -# return graph -# -# def evaluate_intermediate_metrics(self, graph: Pipeline): -# """Evaluate intermediate metrics""" -# # Get the last fold -# last_fold = None -# fold_id = None -# for fold_id, last_fold in enumerate(self._data_producer()): -# pass -# # And so test only on the last fold -# train_data, test_data = last_fold -# graph.try_load_from_cache(self._pipelines_cache, self._preprocessing_cache, fold_id) -# for node in graph.nodes: -# if not isinstance(node.operation, Model): -# continue -# intermediate_graph = Pipeline(node, use_input_preprocessing=graph.use_input_preprocessing) -# intermediate_graph.fit( -# train_data, -# time_constraint=self._time_constraint, -# n_jobs=self._eval_n_jobs, -# ) -# intermediate_fitness = self._objective(intermediate_graph, -# reference_data=test_data, -# validation_blocks=self._validation_blocks) -# # saving only the most important first metric -# node.metadata.metric = intermediate_fitness.values[0] -# -# @property -# def input_data(self): -# return self._data_producer.args[0] diff --git a/fedot_ind/core/models/detection/anomaly/algorithms/convolutional_autoencoder_detector.py b/fedot_ind/core/models/detection/anomaly/algorithms/convolutional_autoencoder_detector.py index 7ee20d55c..e9621d864 100644 --- a/fedot_ind/core/models/detection/anomaly/algorithms/convolutional_autoencoder_detector.py +++ b/fedot_ind/core/models/detection/anomaly/algorithms/convolutional_autoencoder_detector.py @@ -4,11 +4,13 @@ from fedot.core.operations.operation_parameters import OperationParameters from torch import Tensor, cuda, device, no_grad from torch.nn import Conv1d, ConvTranspose1d, Module, MSELoss, Sequential, ReLU -from torch.optim import Adam +from torch.optim import Adam, lr_scheduler from torch.utils.data import DataLoader, TensorDataset, SubsetRandomSampler +from tqdm import tqdm from fedot_ind.core.architecture.settings.computational import backend_methods as np from fedot_ind.core.models.detection.anomaly.algorithms.autoencoder_detector import AutoEncoderDetector +from fedot_ind.core.models.nn.network_modules.layers.special import EarlyStopping device = device("cuda:0" if cuda.is_available() else "cpu") @@ -32,11 +34,16 @@ def __init__(self, params: Optional[OperationParameters] = None): self.decoder_layers = params.get('num_decoder_layers', 2) self.latent_layer_params = params.get('latent_layer', 16) self.convolutional_params = params.get('convolutional_params', - dict(kernel_size=7, stride=2, padding=3)) + dict(kernel_size=3, stride=0, padding=0)) self.activation_func = params.get('act_func', ReLU) self.dropout_rate = params.get('dropout_rate', 0.5) + + def _init_model(self) -> tuple: self._build_encoder() self._build_decoder() + self.loss_fn = MSELoss() + self.optimizer = Adam(self.parameters(), lr=self.learning_rate) + return self.loss_fn, self.optimizer def _build_encoder(self): encoder_layer_dict = OrderedDict() @@ -74,20 +81,8 @@ def _build_decoder(self): in_channels = out_channels self.decoder = Sequential(decoder_layer_dict) - def forward(self, x): - x = self.encoder(x) - x = self.decoder(x) - return x - - def fit(self, - data, - epochs: int = 100, - batch_size: int = 32, - validation_split: float = 0.1): - dataset = TensorDataset(Tensor(data)) - optimizer = Adam(self.parameters(), lr=self.learning_rate) - criterion = MSELoss() - + def _create_dataloader(self, input_data, batch_size, validation_split): + dataset = TensorDataset(Tensor(input_data)) num_train = len(dataset) indices = list(range(num_train)) split = int(np.floor(validation_split * num_train)) @@ -100,24 +95,59 @@ def fit(self, train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler) valid_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler) + return train_loader, valid_loader + + def forward(self, x): + x = self.encoder(x) + x = self.decoder(x) + return x - for epoch in range(epochs): + def fit(self, + data, + epochs: int = 100, + batch_size: int = 32, + validation_split: float = 0.1): + self._init_model() + train_loader, valid_loader = self._create_dataloader(data, batch_size, validation_split) + train_steps, early_stopping, best_model, best_val_loss = max(1, len(train_loader)), EarlyStopping(), \ + None, float('inf') + scheduler = lr_scheduler.OneCycleLR(optimizer=self.optimizer, + steps_per_epoch=train_steps, + epochs=epochs, + max_lr=self.learning_rate) + + def train_one_batch(batch): + batch_x = batch[0] + self.optimizer.zero_grad() + outputs = self.forward(batch_x) + loss = self.loss_fn(outputs, batch_x) + loss.backward() + self.optimizer.step() + return loss.item() + + def val_one_epoch(batch): + inputs = batch[0] + output = self.forward(inputs) + loss = self.loss_fn(output, inputs) + return loss.data.item() * inputs.size(0) + + for epoch in tqdm(range(epochs)): self.train() - train_loss = 0.0 - for batch in train_loader: - optimizer.zero_grad() - outputs = self.forward(batch[0]) - loss = criterion(outputs, batch[0]) - loss.backward() - optimizer.step() - train_loss += loss.item() - self.eval() - valid_loss = 0.0 - with no_grad(): - for batch in valid_loader: - outputs = self.forward(batch[0]) - loss = criterion(outputs, batch[0]) - valid_loss += loss.item() + train_loss = list(map(lambda batch_tuple: train_one_batch(batch_tuple), train_loader)) + train_loss = np.average(train_loss) + if valid_loader is not None: + self.eval() + valid_loss = list(map(lambda batch_tuple: val_one_epoch(batch_tuple), valid_loader)) + valid_loss = np.average(valid_loss) + last_lr = scheduler.get_last_lr()[0] + if epoch % 25 == 0: + print( + "Epoch: {0}, Train Loss: {1} | Validation Loss: {2:.7f}".format(epoch + 1, train_loss, valid_loss)) + print('Updating learning rate to {}'.format(last_lr)) + if early_stopping.early_stop: + print("Early stopping") + break + return self def predict(self, data): self.eval() diff --git a/fedot_ind/core/models/detection/anomaly/algorithms/isolation_forest_detector.py b/fedot_ind/core/models/detection/anomaly/algorithms/isolation_forest_detector.py index d0d7ee65f..eae347f03 100644 --- a/fedot_ind/core/models/detection/anomaly/algorithms/isolation_forest_detector.py +++ b/fedot_ind/core/models/detection/anomaly/algorithms/isolation_forest_detector.py @@ -24,7 +24,7 @@ def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) self.random_state = self.params.get('random_state', 0) self.n_jobs = self.params.get('n_jobs', -1) - self.contamination = self.params.get('contamination', 0.0005) + self.contamination = self.params.get('contamination', 'auto') self.anomaly_threshold = self.params.get('anomaly_thr', 0.3) self.transformation_mode = 'full' diff --git a/fedot_ind/core/models/detection/anomaly_detector.py b/fedot_ind/core/models/detection/anomaly_detector.py index 486f71965..c5c4dea40 100644 --- a/fedot_ind/core/models/detection/anomaly_detector.py +++ b/fedot_ind/core/models/detection/anomaly_detector.py @@ -23,6 +23,9 @@ class AnomalyDetector(ModelImplementation): def __init__(self, params: Optional[OperationParameters] = None) -> None: super().__init__(params) self.length_of_detection_window = self.params.get('window_length', 10) + self.contamination = self.params.get('contamination', 'auto') + if isinstance(self.contamination, str): + self.offset = -0.5 self.transformation_mode = 'lagged' self.transformation_type = None @@ -71,7 +74,10 @@ def build_model(self): def _detect_anomaly_sample(self, score_matrix_row): outlier_score = score_matrix_row[0] - anomaly_sample = outlier_score < 0 and abs(outlier_score) > self.anomaly_threshold + if isinstance(self.contamination, str): + anomaly_sample = abs(outlier_score) > abs(self.anomaly_threshold) + abs(self.offset) + else: + anomaly_sample = outlier_score < 0 and abs(outlier_score) > self.anomaly_threshold return anomaly_sample def _convert_scores_to_labels(self, prob_matrix_row) -> int: diff --git a/fedot_ind/core/models/detection/subspaces/sst.py b/fedot_ind/core/models/detection/subspaces/sst.py index 829f1c9cf..b3f6becaf 100644 --- a/fedot_ind/core/models/detection/subspaces/sst.py +++ b/fedot_ind/core/models/detection/subspaces/sst.py @@ -8,115 +8,6 @@ from fedot_ind.core.operation.transformation.data.hankel import HankelMatrix -# def _score_offline_2d_average(self, x_scaled: list) -> list: -# """Core implementation of offline score calculation with average features. FOR 2D or more D. -# -# Args: -# x_scaled: normalized time series if is_scaled False -# self.dynamic_mode: type of model to check differences in the sequence -# -# Returns: -# filtered_score: represented a list of values with 0 and 1 where 1 is an anomaly if view is True -# -# """ -# score_list = [] -# if not self.dynamic_mode: -# step = self.lag -# start_idx = step -# end_idx = len(x_scaled[0]) - step -# horm_hist = None -# average_features = self.average_features_2d(end_idx, start_idx, x_scaled) -# -# for current_index in range(start_idx, end_idx, step): -# current_features = self.current_features_2d(current_index, step, x_scaled) -# current_features = np.reshape(current_features, (len(x_scaled), 7)) -# if horm_hist is None: -# horm_hist = np.linalg.norm(distance_matrix(average_features.T, current_features.T), 2) -# score_list.append(np.linalg.norm(distance_matrix(average_features.T, current_features.T), 2)) -# else: -# start_idx = self.ts_window_length + self.lag -# end_idx = len(x_scaled[0]) - self.ts_window_length - 1 -# horm_hist = None -# x_history_arr = [] -# average_features = self.average_features_2d(end_idx, start_idx, x_scaled) -# x_history = None -# for ts_number in range(len(average_features)): -# x_history = self.spectrum_extractor.ts_vector_to_trajectory_matrix( -# timeseries=average_features[ts_number], -# K=self.ts_window_length - self.L + 1, -# L=len(average_features[ts_number])) -# x_history_arr.extend(x_history) -# -# for t in range(start_idx, end_idx, self.lag): # get Hankel matrix -# if horm_hist is None: -# horm_hist = np.linalg.norm(x_history, 1) -# -# current_features = self.current_features_2d(t, self.ts_window_length, x_scaled) -# current_features = current_features.reshape(current_features.shape[0], -# (current_features.shape[1] * current_features.shape[2])) -# -# x_test_arr = [] -# x_test = None -# for ts_number in range(len(current_features)): -# x_test = self.spectrum_extractor.ts_vector_to_trajectory_matrix( -# timeseries=current_features[ts_number], -# K=self.ts_window_length - self.L + 1, -# L=len(current_features[ts_number])) -# x_test_arr.extend(x_test) -# -# if self.n_components is None: -# self._n_components(x_test, x_history) -# -# score_list.append( -# self._sst_svd(x_test_arr, x_history_arr)) -# score_diff = np.diff(score_list) -# q_95 = np.quantile(score_diff, self.quantile_rate) -# filtered_score = score_diff -# self.n_components = None -# if self.view: -# filtered_score = list(map(lambda _x: 1 if _x > q_95 else 0, score_diff)) -# return filtered_score -# -# -# def _score_offline_2d(self, x_scaled: list = None) -> list: -# """Core implementation of offline score calculation. FOR 2D or more D. -# -# Args: -# x_scaled: normalized time series if is_scaled False -# self.dynamic_mode: type of model to check differences in the sequence -# -# Returns: -# filtered_score: represented a list of values with 0 and 1 where 1 is an anomaly if view is True -# -# """ -# norm_list_real = [] -# horm_hist = None -# if self.dynamic_mode: -# step = 1 * self.ts_window_length -# start_idx = step -# end_idx = len(x_scaled[0]) - step -# -# current_index = start_idx -# first_window = self._get_window_from_ts_complex(x_scaled, current_index, current_index + step) -# first_features = self._get_features_vector_from_window(first_window) -# first_features = np.asarray(first_features) -# first_features = np.reshape(first_features, (len(x_scaled), 7)) -# -# for current_index in range(start_idx, end_idx, step): -# current_window = self._get_window_from_ts_complex(x_scaled, current_index, current_index + step) -# current_features = self._get_features_vector_from_window(current_window) -# current_features = np.asarray(current_features) -# current_features = np.reshape(current_features, (len(x_scaled), 7)) -# if horm_hist is None: -# horm_hist = np.linalg.norm(distance_matrix(first_features.T, current_features.T), 2) -# norm_list_real.append(np.linalg.norm(distance_matrix(first_features.T, current_features.T), 2)) -# else: -# raise ValueError("Function dose not work when dynamic == False (FOR 2D or more D)") -# -# score_list = [horm_hist] + norm_list_real -# score_diff = np.diff(score_list) -# return score_diff - class SingularSpectrumTransformation: """SingularSpectrumTransformation class. diff --git a/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py b/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py index 719b9a3dd..a317a80a9 100644 --- a/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py +++ b/fedot_ind/core/optimizer/IndustrialEvoOptimizer.py @@ -141,7 +141,7 @@ def _evolve_population(self, def evolve_pop(population, evaluator): individuals_to_select = self.regularization(population, evaluator) new_population = self.reproducer.reproduce(individuals_to_select, evaluator) - if self.reproducer.stop_condition: + if self.reproducer.stop_condition or new_population is None: new_population = population else: self.log.message(f'Successful reproduction') diff --git a/fedot_ind/core/repository/data/default_operation_params.json b/fedot_ind/core/repository/data/default_operation_params.json index 312d87d00..ec5c9f274 100644 --- a/fedot_ind/core/repository/data/default_operation_params.json +++ b/fedot_ind/core/repository/data/default_operation_params.json @@ -429,9 +429,9 @@ }, "iforest_detector": { "window_length": 10, - "anomaly_thr": null, + "anomaly_thr": 0.1, "n_jobs": 2, - "contamination": 0.0005, + "contamination": "auto", "random_state": 42 }, "conv_ae_detector": { diff --git a/fedot_ind/core/repository/excluded.py b/fedot_ind/core/repository/excluded.py index cc3ee79db..56a352dbb 100644 --- a/fedot_ind/core/repository/excluded.py +++ b/fedot_ind/core/repository/excluded.py @@ -60,6 +60,7 @@ 'stl_arima', 'ets', 'cgru', + 'glm' # exclude weak regression models 'sgdr', 'treg', diff --git a/fedot_ind/core/repository/model_repository.py b/fedot_ind/core/repository/model_repository.py index 78cfcf54a..e15869cfb 100644 --- a/fedot_ind/core/repository/model_repository.py +++ b/fedot_ind/core/repository/model_repository.py @@ -52,7 +52,6 @@ from fedot_ind.core.models.nn.network_impl.tst import TSTModel from fedot_ind.core.models.pdl.pairwise_model import PairwiseDifferenceClassifier, PairwiseDifferenceRegressor from fedot_ind.core.models.ts_forecasting.eigen_autoreg import EigenAR -from fedot_ind.core.models.ts_forecasting.glm import GLMIndustrial from fedot_ind.core.operation.filtration.channel_filtration import ChannelCentroidFilter from fedot_ind.core.operation.transformation.basis.eigen_basis import EigenBasisImplementation from fedot_ind.core.operation.transformation.basis.fourier import FourierBasisImplementation @@ -153,8 +152,7 @@ class AtomizedModel(Enum): 'ar': AutoRegImplementation, 'stl_arima': STLForecastARIMAImplementation, 'ets': ExpSmoothingImplementation, - # 'cgru': CGRUImplementation, - 'glm': GLMIndustrial, + # 'glm': GLMIndustrial, 'eigen_forecaster': EigenAR, # variational 'deepar_model': DeepAR, @@ -170,7 +168,11 @@ class AtomizedModel(Enum): 'exog_ts': ExogDataTransformationImplementation } - PRIMARY_FORECASTING_MODELS = ['ar', 'deepar_model', 'glm', 'eigen_forecaster'] + PRIMARY_FORECASTING_MODELS = ['ar', + 'deepar_model', + # 'glm', + 'eigen_forecaster' + ] ANOMALY_DETECTION_MODELS = { # for detection @@ -205,6 +207,9 @@ class AtomizedModel(Enum): 'xcm_model': XCModel, # variational models 'deepar_model': DeepAR, + # detection models + 'conv_ae_detector': ConvolutionalAutoEncoderDetector, + 'lstm_ae_detector': LSTMAutoEncoderDetector, # linear_dummy_model 'dummy': DummyOverComplicatedNeuralNetwork, # linear_dummy_model diff --git a/fedot_ind/core/tuning/search_space.py b/fedot_ind/core/tuning/search_space.py index 591661bd2..ac46cc740 100644 --- a/fedot_ind/core/tuning/search_space.py +++ b/fedot_ind/core/tuning/search_space.py @@ -98,10 +98,8 @@ "n_trend_blocks": {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 5, 1)]]}, "n_seasonality_blocks": {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 4, 1)]]}, "n_of_harmonics": {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 3, 1)]]}}, - 'ssa_forecaster': - {'window_size_method': {'hyperopt-dist': hp.choice, - 'sampling-scope': [['hac', 'dff']]}, - 'history_lookback': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(30, 300, 30)]]}}, + 'bagging': {'method': + {'hyperopt-dist': hp.choice, 'sampling-scope': [['max', 'min', 'mean', 'median']]}}, 'stat_detector': {'anomaly_thr': {'hyperopt-dist': hp.choice, 'sampling-scope': [list(np.arange(0.75, 0.99, 0.05))]}, 'window_length': {'hyperopt-dist': hp.choice, @@ -111,7 +109,7 @@ 'window_length': {'hyperopt-dist': hp.choice, 'sampling-scope': [list(np.arange(10, 35, 5))]}}, 'iforest_detector': - {'anomaly_thr': {'hyperopt-dist': hp.choice, 'sampling-scope': [list(np.arange(0.75, 0.99, 0.05))]}, + {'anomaly_thr': {'hyperopt-dist': hp.choice, 'sampling-scope': [list(np.arange(0.05, 0.5, 0.05))]}, 'window_length': {'hyperopt-dist': hp.choice, 'sampling-scope': [list(np.arange(10, 35, 5))]}}, 'conv_ae_detector': diff --git a/fedot_ind/tools/loader.py b/fedot_ind/tools/loader.py index dc530db5c..bcbae2423 100644 --- a/fedot_ind/tools/loader.py +++ b/fedot_ind/tools/loader.py @@ -46,9 +46,6 @@ def __init__(self, dataset_name: str, folder: Optional[str] = None, source_url: 'M5': M5.load, 'monash_tsf': load_dataset } - self.detection_data_source = { - 'SKAB': self.local_skab_load - } def load_forecast_data(self, folder: Optional[Union[Path, str]] = None): loader = self.forecast_data_source[folder] @@ -59,10 +56,6 @@ def load_forecast_data(self, folder: Optional[Union[Path, str]] = None): ts_df = ts_df.set_index('datetime') if 'datetime' in ts_df.columns else ts_df.set_index('ds') return ts_df, None - def load_detection_data(self, folder=None): - loader = self.detection_data_source['SKAB'] - return loader(directory=folder, group=self.dataset_name) - @staticmethod def local_m4_load(group: Optional[str] = None): path_to_result = EXAMPLES_DATA_PATH + '/forecasting/' @@ -70,37 +63,17 @@ def local_m4_load(group: Optional[str] = None): if result_cvs.__contains__(group): return pd.read_csv(Path(path_to_result, result_cvs)) - @staticmethod - def local_skab_load(directory: Union[Path, str] = 'other', group: Optional[str] = None): - path_to_result = EXAMPLES_DATA_PATH + f'/benchmark/detection/data/{directory}' - df = pd.read_csv(Path(path_to_result, f'{group}.csv'), - index_col='datetime', - sep=';', - parse_dates=True) - x_train, y_train = df.iloc[:120, :-2].values, df.iloc[:120, -2].values - x_test, y_test = df.iloc[120:, :-2].values, df.iloc[120:, -2].values - return (x_train, y_train), (x_test, y_test) - def _load_benchmark_data(self, specific_strategy: str): - bench = self.dataset_name['benchmark'] if specific_strategy == 'anomaly_detection': - self.dataset_name = self.dataset_name['dataset'] - train_data, test_data = self.load_detection_data(bench) - elif specific_strategy in ['ts_forecasting', 'forecasting_assumptions']: - train_data, test_data = self.load_forecast_data(bench) - target = train_data.values[-self.dataset_name['task_params']['forecast_length']:].flatten() - train_data = (train_data, target) - test_data = train_data + train_data, test_data = self.load_detection_data(self.dataset_name) return train_data, test_data def load_custom_data(self, specific_strategy: str): - custom_strategy = specific_strategy in ['anomaly_detection', 'ts_forecasting', 'forecasting_assumptions'] dict_dataset = isinstance(self.dataset_name, dict) if dict_dataset and 'train_data' in self.dataset_name.keys(): return self.dataset_name['train_data'], self.dataset_name['test_data'] - elif custom_strategy: - return self._load_benchmark_data(specific_strategy) - return None, None + else: + return None, None def load_data(self, shuffle: bool = True) -> tuple: """Load data for classification experiment locally or externally from UCR archive.