From 7a4843c571f4621254fd1d5d8fca053d80cba8eb Mon Sep 17 00:00:00 2001 From: "Vadim A. Potemkin" Date: Thu, 4 Jul 2024 13:08:14 +0200 Subject: [PATCH] [FI-60] Unit and Integration tests improvement (#145) --- .github/workflows/pep8_action.yml | 2 +- .github/workflows/poetry_unit_test.yml | 4 +- .../specific_strategy/LoRa_example.py | 2 +- .../federated_automl_example.py | 17 +- .../mnist_lora_example.py | 7 +- fedot_ind/api/main.py | 2 +- fedot_ind/api/utils/industrial_strategy.py | 81 ++++--- .../pipelines/abstract_pipeline.py | 91 -------- .../postprocessing/optimisation_history.py | 8 +- .../postprocessing/results_picker.py | 20 +- .../visualisation/gradcam_vis.py | 16 +- .../preprocessing/data_convertor.py | 35 ++- .../architecture/settings/computational.py | 5 +- .../architecture/settings/pipeline_factory.py | 34 +-- .../core/ensemble/random_automl_forest.py | 58 +++-- .../models/automl/fedot_implementation.py | 18 +- .../core/models/manifold/riemann_embeding.py | 29 +-- .../models/nn/network_impl/base_nn_model.py | 4 +- .../models/nn/network_impl/chronos_tst.py | 103 -------- .../explainable_convolution_model.py | 219 +++++++++--------- .../models/nn/network_impl/mini_rocket.py | 2 +- .../core/models/nn/network_impl/omni_scale.py | 3 +- .../core/models/nn/network_impl/patch_tst.py | 9 +- .../models/nn/network_impl/transformer.py | 31 +-- fedot_ind/core/models/nn/network_impl/tst.py | 39 ++-- .../nn/network_modules/layers/conv_layers.py | 61 +++-- .../network_modules/layers/linear_layers.py | 21 +- .../network_modules/layers/pooling_layers.py | 5 +- .../core/models/nn/network_modules/losses.py | 6 +- .../core/models/nn/network_modules/other.py | 48 +--- .../models/ts_forecasting/ssa_forecaster.py | 23 +- .../operation/IndustrialCachableOperation.py | 27 ++- .../transformation/basis/eigen_basis.py | 8 +- .../core/repository/constanst_repository.py | 71 +++--- .../data/default_operation_params.json | 8 +- fedot_ind/core/tuning/search_space.py | 12 +- fedot_ind/tools/example_utils.py | 52 +++++ fedot_ind/tools/synthetic/ts_generator.py | 38 +-- pyproject.toml | 49 ++-- .../test_ts_classification_advanced.py | 54 ++--- .../test_ts_classification_basic.py | 11 +- .../ts_forecast/forecast_advanced.py | 3 + .../api/utils/test_industrial_strategy.py | 74 ++++++ .../preprocessing/test_data_convertor.py | 16 +- .../core/architecture/settings}/__init__.py | 0 .../settings/test_computational.py | 18 ++ .../settings/test_pipeline_factory.py | 24 ++ tests/unit/core/ensemble/test_RankEnsemble.py | 21 +- .../core/ensemble/test_kernel_ensemble.py | 42 ++-- tests/unit/core/models/model_impl/__init__.py | 0 .../models/{ => model_impl}/test_deepar.py | 10 +- .../core/models/model_impl/test_dummy_nn.py | 10 + .../models/{ => model_impl}/test_inception.py | 0 .../models/model_impl/test_mini_rocket.py | 43 ++++ .../core/models/model_impl/test_nbeats.py | 38 +++ .../core/models/model_impl/test_omni_scale.py | 35 +++ .../core/models/model_impl/test_patch_tst.py | 36 +++ .../models/{ => model_impl}/test_resnet.py | 0 .../core/models/{ => model_impl}/test_tcn.py | 0 .../models/model_impl/test_transformer.py | 26 +++ .../core/models/model_impl/test_xc_model.py | 37 +++ .../core/models/network_modules/__init__.py | 0 .../models/network_modules/test_losses.py | 218 +++++++++++++++++ .../core/models/network_modules/test_other.py | 24 ++ .../core/models/test_fedot_implementation.py | 10 - .../core/models/test_quantile_extractor.py | 13 +- .../unit/core/models/test_riemann_embeding.py | 96 ++++++++ tests/unit/core/models/test_ssa.py | 48 ++-- .../test_fedot_automl_evaluation_strategy.py | 72 +++--- .../transformation/data/test_point_cloud.py | 15 +- tests/unit/models/classification_pipelines.py | 122 ---------- tests/unit/models/forecasting_pipelines.py | 1 - .../models/test_classification_pipelines.py | 15 ++ tests/unit/tools/test_example_utils.py | 29 +++ tests/unit/tools/test_load_data.py | 78 +++---- 75 files changed, 1477 insertions(+), 1030 deletions(-) delete mode 100644 fedot_ind/core/models/nn/network_impl/chronos_tst.py create mode 100644 tests/integration/ts_forecast/forecast_advanced.py create mode 100644 tests/unit/api/utils/test_industrial_strategy.py rename {fedot_ind/core/architecture/pipelines => tests/unit/core/architecture/settings}/__init__.py (100%) create mode 100644 tests/unit/core/architecture/settings/test_computational.py create mode 100644 tests/unit/core/architecture/settings/test_pipeline_factory.py create mode 100644 tests/unit/core/models/model_impl/__init__.py rename tests/unit/core/models/{ => model_impl}/test_deepar.py (94%) create mode 100644 tests/unit/core/models/model_impl/test_dummy_nn.py rename tests/unit/core/models/{ => model_impl}/test_inception.py (100%) create mode 100644 tests/unit/core/models/model_impl/test_mini_rocket.py create mode 100644 tests/unit/core/models/model_impl/test_nbeats.py create mode 100644 tests/unit/core/models/model_impl/test_omni_scale.py create mode 100644 tests/unit/core/models/model_impl/test_patch_tst.py rename tests/unit/core/models/{ => model_impl}/test_resnet.py (100%) rename tests/unit/core/models/{ => model_impl}/test_tcn.py (100%) create mode 100644 tests/unit/core/models/model_impl/test_transformer.py create mode 100644 tests/unit/core/models/model_impl/test_xc_model.py create mode 100644 tests/unit/core/models/network_modules/__init__.py create mode 100644 tests/unit/core/models/network_modules/test_losses.py create mode 100644 tests/unit/core/models/network_modules/test_other.py delete mode 100644 tests/unit/core/models/test_fedot_implementation.py create mode 100644 tests/unit/core/models/test_riemann_embeding.py delete mode 100644 tests/unit/models/classification_pipelines.py create mode 100644 tests/unit/models/test_classification_pipelines.py diff --git a/.github/workflows/pep8_action.yml b/.github/workflows/pep8_action.yml index 731bc5759..622353589 100644 --- a/.github/workflows/pep8_action.yml +++ b/.github/workflows/pep8_action.yml @@ -18,7 +18,7 @@ jobs: id: autopep8 uses: peter-evans/autopep8@v2 with: - args: --exit-code --recursive --in-place --aggressive --aggressive . + args: --exit-code --recursive --in-place --max-line-length 120 --experimental --aggressive --aggressive . - name: Commit autopep8 changes if: steps.autopep8.outputs.exit-code == 2 run: | diff --git a/.github/workflows/poetry_unit_test.yml b/.github/workflows/poetry_unit_test.yml index 2591ebbf1..085c5005d 100644 --- a/.github/workflows/poetry_unit_test.yml +++ b/.github/workflows/poetry_unit_test.yml @@ -13,7 +13,7 @@ jobs: timeout-minutes: 30 strategy: matrix: - python-version: [3.8, 3.9, '3.10'] + python-version: [3.9, '3.10'] steps: - uses: actions/checkout@v2 @@ -44,6 +44,6 @@ jobs: - name: Codecov-coverage uses: codecov/codecov-action@v4 with: - token: ${{ secrets.CODECOV_TOKEN }} file: ./coverage.xml + token: ${{ secrets.CODECOV_TOKEN }} flags: unittests diff --git a/examples/automl_example/api_example/advanced_example/specific_strategy/LoRa_example.py b/examples/automl_example/api_example/advanced_example/specific_strategy/LoRa_example.py index 7b9b9e2fe..7a7a4255a 100644 --- a/examples/automl_example/api_example/advanced_example/specific_strategy/LoRa_example.py +++ b/examples/automl_example/api_example/advanced_example/specific_strategy/LoRa_example.py @@ -31,7 +31,7 @@ api_config = dict(problem='classification', metric='accuracy', - timeout=15, + timeout=0.1, with_tuning=False, industrial_strategy='lora_strategy', industrial_strategy_params=lora_params, diff --git a/examples/automl_example/api_example/advanced_example/specific_strategy/federated_automl_example.py b/examples/automl_example/api_example/advanced_example/specific_strategy/federated_automl_example.py index b0b834a21..9b4d32d04 100644 --- a/examples/automl_example/api_example/advanced_example/specific_strategy/federated_automl_example.py +++ b/examples/automl_example/api_example/advanced_example/specific_strategy/federated_automl_example.py @@ -1,17 +1,22 @@ from fedot_ind.api.main import FedotIndustrial -from fedot_ind.tools.loader import DataLoader +from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator -dataset_name = 'Lightning7' -metric_names = ('f1', 'accuracy', 'precision', 'roc_auc') api_config = dict(problem='classification', metric='f1', - timeout=5, + timeout=0.1, n_jobs=2, industrial_strategy='federated_automl', industrial_strategy_params={}, logging_level=20) -train_data, test_data = DataLoader(dataset_name).load_data() + +# Huge synthetic dataset for experiment +train_data, test_data = TimeSeriesDatasetsGenerator(num_samples=1800, + task='classification', + max_ts_len=50, + binary=True, + test_size=0.5, + multivariate=False).generate_data() + industrial = FedotIndustrial(**api_config) industrial.fit(train_data) predict = industrial.predict(test_data) -_ = 1 diff --git a/examples/automl_example/api_example/computer_vision/image_classification/mnist_lora_example.py b/examples/automl_example/api_example/computer_vision/image_classification/mnist_lora_example.py index 703044625..fb3d3e31d 100644 --- a/examples/automl_example/api_example/computer_vision/image_classification/mnist_lora_example.py +++ b/examples/automl_example/api_example/computer_vision/image_classification/mnist_lora_example.py @@ -252,9 +252,10 @@ def enable_disable_lora(enabled=True): # The original weights have been moved to net.linear1.parametrizations.weight.original # More info here: # https://pytorch.org/tutorials/intermediate/parametrizations.html#inspecting-a-parametrized-module -assert torch.equal(docnn_model.linear1.weight, docnn_model.linear1.parametrizations.weight.original + - (docnn_model.linear1.parametrizations.weight[0].lora_B @ docnn_model.linear1.parametrizations.weight[0].lora_A) * - docnn_model.linear1.parametrizations.weight[0].scale) +assert torch.equal( + docnn_model.linear1.weight, docnn_model.linear1.parametrizations.weight.original + + (docnn_model.linear1.parametrizations.weight[0].lora_B @ docnn_model.linear1.parametrizations.weight[0].lora_A) * + docnn_model.linear1.parametrizations.weight[0].scale) enable_disable_lora(enabled=False) # If we disable LoRA, the linear1.weight is the original one diff --git a/fedot_ind/api/main.py b/fedot_ind/api/main.py index 58286b900..30452db3b 100644 --- a/fedot_ind/api/main.py +++ b/fedot_ind/api/main.py @@ -146,7 +146,7 @@ def __init__(self, **kwargs): api_config=self.config_dict, industrial_strategy=self.industrial_strategy, industrial_strategy_params=self.industrial_strategy_params, - logger=self.logger) + ) def __init_experiment_setup(self): self.logger.info('Initialising experiment setup') diff --git a/fedot_ind/api/utils/industrial_strategy.py b/fedot_ind/api/utils/industrial_strategy.py index a23340443..147ecf596 100644 --- a/fedot_ind/api/utils/industrial_strategy.py +++ b/fedot_ind/api/utils/industrial_strategy.py @@ -1,3 +1,4 @@ +import logging from copy import deepcopy import numpy as np @@ -9,7 +10,7 @@ from fedot.core.repository.dataset_types import DataTypesEnum from fedot_ind.core.ensemble.kernel_ensemble import KernelEnsembler -from fedot_ind.core.ensemble.random_automl_forest import RAFensembler +from fedot_ind.core.ensemble.random_automl_forest import RAFEnsembler from fedot_ind.core.repository.constanst_repository import BATCH_SIZE_FOR_FEDOT_WORKER, FEDOT_WORKER_NUM, \ FEDOT_WORKER_TIMEOUT_PARTITION, FEDOT_TUNING_METRICS, FEDOT_TUNER_STRATEGY, FEDOT_TS_FORECASTING_ASSUMPTIONS, \ FEDOT_TASK @@ -18,10 +19,10 @@ class IndustrialStrategy: - def __init__(self, industrial_strategy_params, + def __init__(self, + industrial_strategy_params, industrial_strategy, api_config, - logger=None ): self.industrial_strategy_params = industrial_strategy_params self.industrial_strategy = industrial_strategy @@ -51,7 +52,7 @@ def __init__(self, industrial_strategy_params, self.ensemble_strategy = list(self.ensemble_strategy_dict.keys()) self.random_label = None self.config_dict = api_config - self.logger = logger + self.logger = logging.getLogger('IndustrialStrategy') self.repo = IndustrialModels().setup_repository() self.kernel_ensembler = KernelEnsembler self.RAF_workers = None @@ -62,33 +63,39 @@ def fit(self, input_data): return self.solver def predict(self, input_data, predict_mode): - return self.industrial_strategy_predict[self.industrial_strategy]( - input_data, predict_mode) + return self.industrial_strategy_predict[self.industrial_strategy](input_data, + predict_mode) def _federated_strategy(self, input_data): - if input_data.features.shape[0] > BATCH_SIZE_FOR_FEDOT_WORKER: + + n_samples = input_data.features.shape[0] + if n_samples > BATCH_SIZE_FOR_FEDOT_WORKER: self.logger.info('RAF algorithm was applied') if self.RAF_workers is None: - batch_size = FEDOT_WORKER_NUM - else: - batch_size = round( - input_data.features.shape[0] / - self.RAF_workers) - # batch_size = round(input_data.features.shape[0] / self.RAF_workers if self.RAF_workers - # is not None else FEDOT_WORKER_NUM) - batch_timeout = round( - self.config_dict['timeout'] / - FEDOT_WORKER_TIMEOUT_PARTITION) - self.config_dict['timeout'] = batch_timeout - self.logger.info( - f'Batch_size - {batch_size}. Number of batches - {self.RAF_workers}') - self.solver = RAFensembler(composing_params=self.config_dict, + self.RAF_workers = FEDOT_WORKER_NUM + batch_size = round(input_data.features.shape[0] / self.RAF_workers) + + min_timeout = 0.5 + selected_timeout = round(self.config_dict['timeout'] / FEDOT_WORKER_TIMEOUT_PARTITION) + self.config_dict['timeout'] = max(min_timeout, selected_timeout) + + self.logger.info(f'Batch_size - {batch_size}. Number of batches - {self.RAF_workers}') + + self.solver = RAFEnsembler(composing_params=self.config_dict, n_splits=self.RAF_workers, batch_size=batch_size) self.logger.info( f'Number of AutoMl models in ensemble - {self.solver.n_splits}') + self.solver.fit(input_data) + + else: + self.logger.info(f'RAF algorithm is not applicable: n_samples={n_samples} < {BATCH_SIZE_FOR_FEDOT_WORKER}. ' + f'FEDOT algorithm was applied') + self.solver = Fedot(**self.config_dict) + self.solver.fit(input_data) + def _forecasting_strategy(self, input_data): self.logger.info('TS forecasting algorithm was applied') self.config_dict['timeout'] = round(self.config_dict['timeout'] / 3) @@ -170,23 +177,27 @@ def _lora_strategy(self, input_data): def _federated_predict(self, input_data, mode: str = 'labels'): - self.predicted_branch_probs = [ - x.predict(input_data).predict for x in self.solver.root_node.nodes_from] - self.predicted_branch_labels = [ - np.argmax(x, axis=1) for x in self.predicted_branch_probs] - n_samples, n_channels, n_classes = self.predicted_branch_probs[0].shape[0], \ - len(self.predicted_branch_probs), \ - self.predicted_branch_probs[0].shape[1] - head_model = deepcopy(self.solver.root_node) + valid_nodes = self.solver.current_pipeline.root_node.nodes_from + self.predicted_branch_probs = [x.predict(input_data).predict for x in valid_nodes] + + # reshape if binary + if len(self.predicted_branch_probs[0].shape) < 2: + self.predicted_branch_probs = [np.array([x, 1 - x]).T for x in self.predicted_branch_probs] + + self.predicted_branch_labels = [np.argmax(x, axis=1) for x in self.predicted_branch_probs] + + n_samples = self.predicted_branch_probs[0].shape[0] + n_channels = len(self.predicted_branch_probs) + + head_model = deepcopy(self.solver.current_pipeline.root_node) head_model.nodes_from = [] - input_data.features = np.hstack( - self.predicted_branch_labels).reshape( - n_samples, n_channels, 1) - head_predict = head_model.predict(self.predict_data).predict + input_data.features = np.hstack(self.predicted_branch_labels).reshape(n_samples, + n_channels, + 1) if mode == 'labels': - return head_predict + return head_model.predict(input_data, 'labels').predict else: - return np.argmax(head_predict, axis=1) + return head_model.predict(input_data).predict def _forecasting_predict(self, input_data, diff --git a/fedot_ind/core/architecture/pipelines/abstract_pipeline.py b/fedot_ind/core/architecture/pipelines/abstract_pipeline.py index 7ad4bdb1c..3e0028c2b 100644 --- a/fedot_ind/core/architecture/pipelines/abstract_pipeline.py +++ b/fedot_ind/core/architecture/pipelines/abstract_pipeline.py @@ -4,7 +4,6 @@ from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels from fedot_ind.tools.loader import DataLoader - BENCHMARK = 'M4' @@ -77,93 +76,3 @@ def evaluate_pipeline(self, node_list, dataset): predict_labels=predict.predict, predict_probs=predict_proba.predict, quality_metric=metric) - -# class AbstractPipelines: -# def __init__(self, train_data, test_data): -# self.train_features = train_data[0] -# self.train_target = train_data[1] -# self.test_features = test_data[0] -# self.test_target = test_data[1] -# self.basis = None -# -# self.basis_dict = {i.name: i.value for i in BasisTransformations} -# self.model_dict = {i.name: i.value for i in MlModel} -# self.feature_generator_dict = { -# i.name: i.value for i in FeatureGenerator} -# -# self.generators_with_matrix_input = ['topological', -# 'wavelet', -# 'recurrence', -# 'quantile'] -# -# def _evaluate(self, classificator, train_features, test_features): -# fitted_model = classificator.fit(train_features=train_features, -# train_target=self.train_target) -# predicted_probs_labels = (classificator.predict(test_features=test_features), -# classificator.predict_proba(test_features=test_features)) -# metrics = PerformanceAnalyzer().calculate_metrics(target=self.test_target, -# predicted_labels=predicted_probs_labels[0], -# predicted_probs=predicted_probs_labels[1]) -# return fitted_model, metrics -# -# def get_feature_generator(self, **kwargs): -# pass -# -# def _get_feature_matrix(self, list_of_features, mode: str = 'Multi', **kwargs): -# if mode == '1D': -# feature_matrix = pd.concat(list_of_features, axis=0) -# if feature_matrix.shape[0] != len(list_of_features): -# feature_matrix = pd.concat(list_of_features, axis=1) -# elif mode == 'MultiEnsemble': -# feature_matrix = [] -# for i in range(len(list_of_features[0])): -# _ = [] -# for feature_set in list_of_features: -# _.append(feature_set[i]) -# feature_matrix.append(pd.concat(_, axis=0)) -# elif mode == 'list_of_ts': -# feature_matrix = [] -# for ts in list_of_features: -# list_of_windows = [] -# for step in range(0, ts.shape[1], kwargs['window_length']): -# list_of_windows.append( -# ts[:, step:step + kwargs['window_length']]) -# feature_matrix.append(list_of_windows) -# else: -# feature_matrix = pd.concat( -# [pd.concat(feature_set, axis=1) for feature_set in list_of_features], axis=0) -# return feature_matrix -# -# def _init_pipeline_nodes(self, model_type: str = 'tsc', **kwargs): -# if 'feature_generator_type' not in kwargs.keys(): -# generator = self.feature_generator_dict['quantile'] -# else: -# generator = self.feature_generator_dict[kwargs['feature_generator_type']] -# try: -# feature_extractor = generator(params=kwargs['feature_hyperparams']) -# -# except AttributeError: -# with open(PATH_TO_DEFAULT_PARAMS, 'r') as file: -# _feature_gen_params = json.load(file) -# params = _feature_gen_params[f'{generator}_extractor'] -# feature_extractor = generator(params) -# try: -# classificator = self.model_dict[model_type](model_hyperparams=kwargs['model_hyperparams'], -# generator_name=kwargs['feature_generator_type'], -# generator_runner=feature_extractor) -# except Exception: -# classificator = None -# -# lambda_func_dict = {'create_list_of_ts': lambda x: ListMonad(*x.values.tolist()), -# 'scale': lambda time_series: pd.DataFrame(MinMaxScaler().fit_transform( -# time_series.to_numpy())), -# 'transpose_matrix': lambda time_series: time_series.T, -# 'reduce_basis': lambda x: x[:, 0] if x.shape[1] == 1 else x[:, kwargs['component']], -# 'extract_features': lambda x: feature_extractor.get_features(x), -# 'fit_model': lambda x: classificator.fit(train_features=x, train_target=self.train_target), -# 'predict': lambda x: ListMonad({'predicted_labels': classificator.predict(test_features=x), -# 'predicted_probs': classificator.predict_proba( -# test_features=x)}) -# } -# -# return feature_extractor, classificator, lambda_func_dict diff --git a/fedot_ind/core/architecture/postprocessing/optimisation_history.py b/fedot_ind/core/architecture/postprocessing/optimisation_history.py index 325cc3ba2..c0f1ddd9b 100644 --- a/fedot_ind/core/architecture/postprocessing/optimisation_history.py +++ b/fedot_ind/core/architecture/postprocessing/optimisation_history.py @@ -54,12 +54,10 @@ def update_individual(cls: Type[Individual], json_obj: Dict[str, Any]): class RenameUnpickler(pickle.Unpickler): def find_class(self, module: str, name: str): renamed_module = module - changed_import_list = [ - 'fedot_ind.core.repository.initializer_industrial_models'] + changed_import_list = ['fedot_ind.core.repository.initializer_industrial_models'] if module in changed_import_list: - renamed_module = module.replace( - "golem.core.utilities", - "fedot_ind.core.repository.industrial_implementations.optimisation") + renamed_module = module.replace("golem.core.utilities", + "fedot_ind.core.repository.industrial_implementations.optimisation") return super(RenameUnpickler, self).find_class(renamed_module, name) diff --git a/fedot_ind/core/architecture/postprocessing/results_picker.py b/fedot_ind/core/architecture/postprocessing/results_picker.py index c3baadff9..535170439 100644 --- a/fedot_ind/core/architecture/postprocessing/results_picker.py +++ b/fedot_ind/core/architecture/postprocessing/results_picker.py @@ -61,22 +61,18 @@ def run(self, get_metrics_df: bool = False, add_info: bool = False): return proba_dict, metric_dict def _create_metrics_df(self, metric_dict): - columns = ['dataset', 'experiment'] metrics_df = pd.DataFrame() for ds in metric_dict.keys(): for exp in metric_dict[ds].keys(): metrics = metric_dict[ds][exp].to_dict(orient='records')[0] - metrics_df = metrics_df.append({'dataset': ds, - 'experiment': exp, - 'f1': metrics.get('f1'), - 'roc_auc': metrics.get('roc_auc'), - 'accuracy': metrics.get('accuracy'), - 'precision': metrics.get('precision'), - 'logloss': metrics.get('logloss')}, - ignore_index=True) - - metrics_df = pd.concat([metrics_df[['dataset', 'experiment']], metrics_df[[ - col for col in metrics_df.columns if col not in columns]]], axis=1) + df = pd.DataFrame.from_dict({'dataset': ds, + 'experiment': exp, + 'f1': metrics.get('f1'), + 'roc_auc': metrics.get('roc_auc'), + 'accuracy': metrics.get('accuracy'), + 'precision': metrics.get('precision'), + 'logloss': metrics.get('logloss')}, orient='index').T + metrics_df = pd.concat([metrics_df, df], axis=0) return metrics_df def get_metrics_and_proba(self): diff --git a/fedot_ind/core/architecture/postprocessing/visualisation/gradcam_vis.py b/fedot_ind/core/architecture/postprocessing/visualisation/gradcam_vis.py index 7561d9d12..0e605117b 100644 --- a/fedot_ind/core/architecture/postprocessing/visualisation/gradcam_vis.py +++ b/fedot_ind/core/architecture/postprocessing/visualisation/gradcam_vis.py @@ -8,7 +8,7 @@ def visualise_gradcam(att_maps, figsize, cmap, **kwargs): - matplotlib.use('TKagg') + # matplotlib.use('TKagg') if figsize is None: figsize = (12, 4) @@ -18,16 +18,18 @@ def visualise_gradcam(att_maps, att_maps[1] = att_maps[1].mean(1) idx_plot = list(range(2 + len(median_sample))) - fig, axs = plt.subplots( - len(idx_plot), 1, figsize=figsize, sharex=True, **kwargs) + fig, axs = plt.subplots(len(idx_plot), 1, + figsize=figsize, + sharex=True, + **kwargs) for idx, class_number in enumerate(median_sample): axs[idx].set_title(f'Median sample of {class_number}') sns.lineplot(median_sample[class_number].reshape(-1, 1), ax=axs[idx]) + axs[idx_plot[-2]].set_title('Observed Variables') axs[idx_plot[-1]].set_title('Attention by the time') - sns.heatmap(att_maps[0].numpy(), cbar=False, - cmap=cmap, ax=axs[idx_plot[-2]]) - sns.heatmap(att_maps[1].numpy(), cbar=False, - cmap=cmap, ax=axs[idx_plot[-1]]) + + sns.heatmap(att_maps[0].numpy(), cbar=False, cmap=cmap, ax=axs[idx_plot[-2]]) + sns.heatmap(att_maps[1].numpy(), cbar=False, cmap=cmap, ax=axs[idx_plot[-1]]) fig.tight_layout() plt.show() diff --git a/fedot_ind/core/architecture/preprocessing/data_convertor.py b/fedot_ind/core/architecture/preprocessing/data_convertor.py index 7254b87a7..3104e3f6c 100644 --- a/fedot_ind/core/architecture/preprocessing/data_convertor.py +++ b/fedot_ind/core/architecture/preprocessing/data_convertor.py @@ -40,12 +40,11 @@ def __init__(self, ts): label_1 = max(ts.class_labels) label_0 = min(ts.class_labels) self.classes = ts.num_classes - if self.classes == 2 and label_1 != 1: - ts.target[ts.target == label_0] = 0 - ts.target[ts.target == label_1] = 1 - elif self.classes == 2 and label_0 != 0: - ts.target[ts.target == label_0] = 0 - ts.target[ts.target == label_1] = 1 + + if self.classes == 2: + if label_0 != 0 or label_1 != 1: + ts.target[ts.target == label_0] = 0 + ts.target[ts.target == label_1] = 1 elif self.classes > 2 and label_0 == 1: ts.target = ts.target - 1 if type(min(ts.target)) is np.str_: @@ -55,11 +54,8 @@ def __init__(self, ts): self.label_encoder = None try: - self.y = torch.nn.functional.one_hot( - torch.from_numpy( - ts.target).long(), - num_classes=self.classes).to( - default_device()).squeeze(1) + self.y = torch.nn.functional.one_hot(torch.from_numpy(ts.target).long(), + num_classes=self.classes).to(default_device()).squeeze(1) except Exception: self.y = torch.nn.functional.one_hot(torch.from_numpy( ts.target).long()).to(default_device()).squeeze(1) @@ -108,10 +104,8 @@ def __init_input_data(self, features: pd.DataFrame, 'regression': Task(TaskTypesEnum.regression)} if is_multivariate_data: input_data = InputData(idx=np.arange(len(features)), - features=np.array( - features.values.tolist()).astype(float), - target=target.astype( - float).reshape(-1, 1), + features=np.array(features.values.tolist()).astype(float), + target=target.astype(float).reshape(-1, 1), task=task_dict[task], data_type=MULTI_ARRAY) else: @@ -168,10 +162,10 @@ def convert_input_to_output(self): def convert_to_industrial_composing_format(self, mode): if mode == 'one_dimensional': - new_features, new_target = [ - array.reshape(array.shape[0], array.shape[1] * array.shape[2]) - if array is not None and len(array.shape) > 2 else array - for array in [self.input_data.features, self.input_data.target]] + new_features, new_target = [array.reshape(array.shape[0], array.shape[1] * array.shape[2]) + if array is not None and len(array.shape) > 2 + else array + for array in [self.input_data.features, self.input_data.target]] # if new_features.shape[0] != new_target.shape[0]: # min_samples = min(new_features.shape[0], new_target.shape[0]) # new_features, new_target = new_features[:min_samples], new_target[:min_samples] @@ -562,7 +556,8 @@ def is_tuple(self): @property def is_torchvision_dataset(self): if self.is_tuple: - return self.data[1] == 'torchvision_dataset' + return np.all(self.data[1] == 'torchvision_dataset') + # return self.data[1] == 'torchvision_dataset' else: return False diff --git a/fedot_ind/core/architecture/settings/computational.py b/fedot_ind/core/architecture/settings/computational.py index e0ab169cf..f22caa9be 100644 --- a/fedot_ind/core/architecture/settings/computational.py +++ b/fedot_ind/core/architecture/settings/computational.py @@ -62,7 +62,7 @@ def global_imports(object_name: str, globals()[short_name] = getattr(context_module, object_name) -def default_device(device_type: str = 'CUDA'): +def default_device(device_type: str = 'CPU'): """Return or set default device. Modified from fastai. Args: @@ -73,7 +73,8 @@ def default_device(device_type: str = 'CUDA'): """ if device_type == 'CUDA': - device_type = defaults.use_cuda + defaults.use_cuda = True + return torch.device("cuda") elif device_type == 'cpu': defaults.use_cuda = False return torch.device("cpu") diff --git a/fedot_ind/core/architecture/settings/pipeline_factory.py b/fedot_ind/core/architecture/settings/pipeline_factory.py index f5104b942..e11f305f1 100644 --- a/fedot_ind/core/architecture/settings/pipeline_factory.py +++ b/fedot_ind/core/architecture/settings/pipeline_factory.py @@ -19,7 +19,6 @@ class BasisTransformations(Enum): class FeatureGenerator(Enum): quantile = QuantileExtractor - # signal = SignalExtractor topological = TopologicalExtractor recurrence = RecurrenceExtractor @@ -64,33 +63,18 @@ class KernelFeatureGenerator(Enum): ] wavelet = [ {'feature_generator_type': 'wavelet', - 'feature_hyperparams': { - 'wavelet': "mexh", - 'n_components': 2 - }} # , - # {'feature_generator_type': 'wavelet', - # 'feature_hyperparams': { - # 'wavelet': "haar", - # 'n_components': 2 - # }} - , - # {'feature_generator_type': 'wavelet', - # 'feature_hyperparams': { - # 'wavelet': "dmey", - # 'n_components': 2 - # } - # }, + 'feature_hyperparams': {'wavelet': "mexh", + 'n_components': 2} + }, + {'feature_generator_type': 'wavelet', - 'feature_hyperparams': { - 'wavelet': "gaus3", - 'n_components': 2 - } + 'feature_hyperparams': {'wavelet': "gaus3", + 'n_components': 2} }, + {'feature_generator_type': 'wavelet', - 'feature_hyperparams': { - 'wavelet': "morl", - 'n_components': 2 - } + 'feature_hyperparams': {'wavelet': "morl", + 'n_components': 2} } ] recurrence = [] diff --git a/fedot_ind/core/ensemble/random_automl_forest.py b/fedot_ind/core/ensemble/random_automl_forest.py index 9aa5a556b..5b87ecd67 100644 --- a/fedot_ind/core/ensemble/random_automl_forest.py +++ b/fedot_ind/core/ensemble/random_automl_forest.py @@ -11,56 +11,52 @@ from fedot_ind.core.repository.model_repository import SKLEARN_CLF_MODELS, SKLEARN_REG_MODELS -class RAFensembler: +class RAFEnsembler: """Class for ensemble of random automl forest Args: composing_params: dict with parameters for ensemble - ensemble_type: type of ensemble n_splits: number of splits for ensemble batch_size: size of batch for ensemble """ - def __init__(self, composing_params, - ensemble_type: str = 'random_automl_forest', + def __init__(self, + composing_params, n_splits: int = None, batch_size: int = 1000): self.current_pipeline = None - ensemble_dict = {'random_automl_forest': self._raf_ensemble} self.problem = composing_params['problem'] self.task = FEDOT_TASK[composing_params['problem']] self.atomized_automl = FEDOT_ATOMIZE_OPERATION[composing_params['problem']] self.head = FEDOT_HEAD_ENSEMBLE[composing_params['problem']] - self.ensemble_method = ensemble_dict[ensemble_type] + self.ensemble_method = self._raf_ensemble self.atomized_automl_params = composing_params - + self.n_splits = n_splits self.batch_size = batch_size - if n_splits is None: - self.n_splits = n_splits - else: - self.n_splits = n_splits def _decompose_pipeline(self): batch_pipe = [automl_branch.fitted_operation.model.current_pipeline.root_node for automl_branch in - self.current_pipeline.nodes if - automl_branch.name in FEDOT_ATOMIZE_OPERATION.values()] + self.current_pipeline.nodes if automl_branch.name in FEDOT_ATOMIZE_OPERATION.values()] self.ensemble_branches = batch_pipe self.ensemble_head = self.current_pipeline.nodes[0] self.ensemble_head.nodes_from = self.ensemble_branches - composed = Pipeline(self.ensemble_head) - self.current_pipeline = composed + self.current_pipeline = Pipeline(self.ensemble_head) def fit(self, train_data): if self.n_splits is None: - self.n_splits = round( - train_data.features.shape[0] / self.batch_size) - new_features = np.array_split(train_data.features, self.n_splits) - new_target = np.array_split(train_data.target, self.n_splits) - self.current_pipeline = self.ensemble_method( - new_features, new_target, n_splits=self.n_splits) + self.n_splits = round(train_data.features.shape[0] / self.batch_size) + + new_features = np.array_split(train_data.features, + self.n_splits) + new_target = np.array_split(train_data.target, + self.n_splits) + + self.current_pipeline = self.ensemble_method(new_features, + new_target, + n_splits=self.n_splits) self._decompose_pipeline() def predict(self, test_data, output_mode: str = 'labels'): @@ -69,29 +65,29 @@ def predict(self, test_data, output_mode: str = 'labels'): def _raf_ensemble(self, features, target, n_splits): raf_ensemble = PipelineBuilder() data_dict = {} - for i, data_fold_features, data_fold_target in zip( - range(n_splits), features, target): + for i, data_fold_features, data_fold_target in zip(range(n_splits), features, target): + train_fold = InputData(idx=np.arange(0, len(data_fold_features)), features=data_fold_features, target=data_fold_target, task=self.task, data_type=DataTypesEnum.image) - raf_ensemble.add_node( - f'data_source_img/{i}', - branch_idx=i).add_node( - self.atomized_automl, - params=self.atomized_automl_params, - branch_idx=i) + raf_ensemble.add_node(operation_type=f'data_source_img/{i}', + branch_idx=i)\ + .add_node(self.atomized_automl, + params=self.atomized_automl_params, + branch_idx=i) + data_dict.update({f'data_source_img/{i}': train_fold}) train_multimodal = MultiModalData(data_dict) head_automl_params = deepcopy(self.atomized_automl_params) + head_automl_params['available_operations'] = [ operation for operation in head_automl_params['available_operations'] if operation in list( SKLEARN_CLF_MODELS.keys()) or operation in list( SKLEARN_REG_MODELS.keys())] - # head_automl_params['initial_assumption'] = FEDOT_ENSEMBLE_ASSUMPTIONS[self.atomized_automl_params[ - # 'problem']].build() + raf_ensemble = raf_ensemble.join_branches(self.head).build() raf_ensemble.fit(input_data=train_multimodal) return raf_ensemble diff --git a/fedot_ind/core/models/automl/fedot_implementation.py b/fedot_ind/core/models/automl/fedot_implementation.py index 8808025bb..e9b0b9c67 100644 --- a/fedot_ind/core/models/automl/fedot_implementation.py +++ b/fedot_ind/core/models/automl/fedot_implementation.py @@ -12,8 +12,7 @@ class FedotClassificationImplementation(ModelImplementation): """Implementation of Fedot as classification pipeline node for AutoML. """ - AVAILABLE_OPERATIONS = default_industrial_availiable_operation( - 'classification') + AVAILABLE_OPERATIONS = default_industrial_availiable_operation('classification') def __init__(self, params: Optional[OperationParameters] = None): if not params: @@ -21,7 +20,7 @@ def __init__(self, params: Optional[OperationParameters] = None): else: params = params.to_dict() if 'available_operations' not in params.keys(): - params.update({'available_operations': self.AVAILABLE_OPERATIONS}) + params.update(**{'available_operations': self.AVAILABLE_OPERATIONS}) self.model = Fedot(**params) super(FedotClassificationImplementation, self).__init__() @@ -29,20 +28,15 @@ def fit(self, input_data: InputData): self.model.fit(input_data) return self - def predict( - self, - input_data: InputData, - output_mode='default') -> OutputData: - return self.model.current_pipeline.predict( - input_data, output_mode=output_mode) + def predict(self, input_data: InputData, output_mode='default') -> OutputData: + return self.model.current_pipeline.predict(input_data, output_mode=output_mode) class FedotRegressionImplementation(ModelImplementation): """Implementation of Fedot as regression pipeline node for AutoML. """ - AVAILABLE_OPERATIONS = default_industrial_availiable_operation( - 'regression') + AVAILABLE_OPERATIONS = default_industrial_availiable_operation('regression') def __init__(self, params: Optional[OperationParameters] = None): if not params: @@ -50,7 +44,7 @@ def __init__(self, params: Optional[OperationParameters] = None): else: params = params.to_dict() if 'available_operations' not in params.keys(): - params.update({'available_operations': self.AVAILABLE_OPERATIONS}) + params.update(**{'available_operations': self.AVAILABLE_OPERATIONS}) self.model = Fedot(**params) super(FedotRegressionImplementation, self).__init__() diff --git a/fedot_ind/core/models/manifold/riemann_embeding.py b/fedot_ind/core/models/manifold/riemann_embeding.py index c5dceba93..5fd918a7e 100644 --- a/fedot_ind/core/models/manifold/riemann_embeding.py +++ b/fedot_ind/core/models/manifold/riemann_embeding.py @@ -1,7 +1,7 @@ from typing import Optional import numpy as np -from fedot.core.data.data import InputData +from fedot.core.data.data import InputData, OutputData from fedot.core.operations.operation_parameters import OperationParameters from pyriemann.estimation import Covariances, Shrinkage from pyriemann.tangentspace import TangentSpace @@ -67,37 +67,38 @@ def _init_spaces(self): self.tangent_space = TangentSpace(metric=self.tangent_metric) self.shrinkage = Shrinkage() - def extract_riemann_features(self, input_data: InputData) -> InputData: + def extract_riemann_features(self, input_data: InputData) -> np.ndarray: if not self.fit_stage: SPD = self.spd_space.transform(input_data.features) SPD = self.shrinkage.transform(SPD) ref_point = self.tangent_space.transform(SPD) else: - SPD = self.spd_space.fit_transform( - input_data.features, input_data.target) + SPD = self.spd_space.fit_transform(input_data.features, + input_data.target) SPD = self.shrinkage.fit_transform(SPD) ref_point = self.tangent_space.fit_transform(SPD) self.fit_stage = False self.classes_ = np.unique(input_data.target) return ref_point - def extract_centroid_distance(self, input_data: InputData): + def extract_centroid_distance(self, input_data: InputData) -> np.ndarray: input_data.target = input_data.target.astype(int) if self.fit_stage: - SPD = self.spd_space.fit_transform( - input_data.features, input_data.target) + SPD = self.spd_space.fit_transform(input_data.features, + input_data.target) SPD = self.shrinkage.transform(SPD) else: SPD = self.spd_space.transform(input_data.features) SPD = self.shrinkage.fit_transform(SPD) - self.covmeans_ = [mean_covariance(SPD[np.array(input_data.target == ll).flatten( - )], metric=self.spd_metric) for ll in self.classes_] + self.covmeans_ = [mean_covariance(SPD[np.array(input_data.target == ll).flatten()], + metric=self.spd_metric) for ll in self.classes_] n_centroids = len(self.covmeans_) - dist = [distance(SPD, self.covmeans_[m], self.tangent_metric) - for m in range(n_centroids)] + dist = [distance(SPD, + self.covmeans_[m], + self.tangent_metric) for m in range(n_centroids)] dist = np.concatenate(dist, axis=1) feature_matrix = softmax(-dist ** 2) return feature_matrix @@ -105,11 +106,11 @@ def extract_centroid_distance(self, input_data: InputData): def _ensemble_features(self, input_data: InputData): tangent_features = self.extract_riemann_features(input_data) dist_features = self.extract_centroid_distance(input_data) - feature_matrix = np.concatenate( - [tangent_features, dist_features], axis=1) + feature_matrix = np.concatenate([tangent_features, dist_features], + axis=1) return feature_matrix - def _transform(self, input_data: InputData) -> np.array: + def _transform(self, input_data: InputData) -> OutputData: """ Method for feature generation for all series """ diff --git a/fedot_ind/core/models/nn/network_impl/base_nn_model.py b/fedot_ind/core/models/nn/network_impl/base_nn_model.py index a29fc694b..c5f0c6d5e 100644 --- a/fedot_ind/core/models/nn/network_impl/base_nn_model.py +++ b/fedot_ind/core/models/nn/network_impl/base_nn_model.py @@ -63,7 +63,7 @@ def fit(self, input_data: InputData): self._save_and_clear_cache() @convert_to_4d_torch_array - def _fit_model(self, ts: InputData): + def _fit_model(self, ts: InputData, split_data: bool = True): loss_fn, optimizer = self._init_model(ts) train_loader, val_loader = self._prepare_data(ts, split_data=True) @@ -76,7 +76,7 @@ def _fit_model(self, ts: InputData): ) def _init_model(self, ts) -> tuple: - NotImplementedError() + raise NotImplementedError() def _prepare_data(self, ts, split_data: bool = True): diff --git a/fedot_ind/core/models/nn/network_impl/chronos_tst.py b/fedot_ind/core/models/nn/network_impl/chronos_tst.py deleted file mode 100644 index b9e0683c9..000000000 --- a/fedot_ind/core/models/nn/network_impl/chronos_tst.py +++ /dev/null @@ -1,103 +0,0 @@ - -# from chronos import ChronosPipeline - - -class ChronosExtractor: - pass - - -# def chronos_small(input_dim: int = 1, -# seq_len: int = 1, -# num_features: int = 100): -# model = ChronosPipeline.from_pretrained("amazon/chronos-t5-small", -# device_map='cpu', -# torch_dtype=torch.bfloat16) -# chronos_encoder = model.model.model.encoder -# return chronos_encoder - - -# class ChronosExtractor(BaseExtractor): -# """Feature space generator based on Chronos model. -# -# Attributes: -# num_features: int, the number of features. -# -# Example: -# To use this operation you can create pipeline as follows:: -# from fedot.core.pipelines.pipeline_builder import PipelineBuilder -# from examples.fedot.fedot_ex import init_input_data -# from fedot_ind.tools.loader import DataLoader -# from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels -# -# train_data, test_data = DataLoader(dataset_name='Ham').load_data() -# with IndustrialModels(): -# pipeline = PipelineBuilder().add_node('chronos_extractor')\ -# .add_node('rf').build() -# input_data = init_input_data(train_data[0], train_data[1]) -# pipeline.fit(input_data) -# features = pipeline.predict(input_data) -# print(features) -# -# """ -# -# def __init__(self, params: Optional[OperationParameters] = None): -# super().__init__(params) -# self.num_features = params.get('num_features', 10000) -# -# def __repr__(self): -# return 'TransformerFeatureSpace' -# -# def _save_and_clear_cache(self, model_list: list): -# del model_list -# with torch.no_grad(): -# torch.cuda.empty_cache() -# -# def _generate_features_from_ts(self, ts: np.array, mode: str = 'multivariate'): -# -# if ts.shape[1] > 1 and mode == 'chanel_independent': -# chrono_model = chronos_small(input_dim=1, -# seq_len=ts.shape[2], -# num_features=self.num_features) -# -# n_dim = range(ts.shape[1]) -# ts_converted = [ts[:, i, :] for i in n_dim] -# ts_converted = [x.reshape(x.shape[0], 1, x.shape[1]) -# for x in ts_converted] -# model_list = [chrono_model for i in n_dim] -# else: -# chrono_model = chronos_small(input_dim=ts.shape[1], -# seq_len=ts.shape[2], -# num_features=self.num_features) -# -# ts_converted = [ts.swapaxes(1, 2)] -# model_list = [chrono_model] -# -# features = [chrono_model(inputs_embeds=torch.Tensor(data).to(default_device('cpu')).to(torch.long)) -# for model, data in zip(model_list, ts_converted)] -# -# chrono_features = [feature_by_dim.swapaxes(1, 2) for feature_by_dim in features] -# minirocket_features = np.concatenate(chrono_features, axis=1) -# minirocket_features = OutputData(idx=np.arange(minirocket_features.shape[2]), -# task=self.task, -# predict=minirocket_features, -# data_type=DataTypesEnum.image) -# self._save_and_clear_cache(model_list) -# return minirocket_features -# -# def generate_chronos_features(self, ts: np.array) -> InputData: -# return self._generate_features_from_ts(ts) -# -# def generate_features_from_ts(self, ts_data: np.array, -# dataset_name: str = None): -# return self.generate_chronos_features(ts=ts_data) -# -# def _transform(self, -# input_data: InputData) -> np.array: -# """ -# Method for feature generation for all series -# """ -# self.task = input_data.task -# self.task.task_params = self.__repr__() -# feature_matrix = self.generate_features_from_ts(input_data.features) -# feature_matrix.predict = self._clean_predict(feature_matrix.predict) -# return feature_matrix diff --git a/fedot_ind/core/models/nn/network_impl/explainable_convolution_model.py b/fedot_ind/core/models/nn/network_impl/explainable_convolution_model.py index 614b8d360..fc13825dc 100644 --- a/fedot_ind/core/models/nn/network_impl/explainable_convolution_model.py +++ b/fedot_ind/core/models/nn/network_impl/explainable_convolution_model.py @@ -1,10 +1,11 @@ +from typing import Optional + import torch from fastai.callback.hook import * from fastai.layers import BatchNorm, LinBnDrop, SigmoidRange from fastai.torch_core import Module from fedot.core.operations.operation_parameters import OperationParameters from torch import nn, optim -from typing import Optional from fedot_ind.core.architecture.abstraction.decorators import convert_inputdata_to_torch_dataset from fedot_ind.core.architecture.postprocessing.visualisation.gradcam_vis import visualise_gradcam @@ -12,13 +13,11 @@ from fedot_ind.core.architecture.settings.computational import default_device from fedot_ind.core.models.nn.network_impl.base_nn_model import BaseNeuralModel from fedot_ind.core.models.nn.network_modules.layers.conv_layers import Conv1d, Conv2d -from fedot_ind.core.models.nn.network_modules.layers.linear_layers import Concat, Reshape, Squeeze, Unsqueeze +from fedot_ind.core.models.nn.network_modules.layers.linear_layers import Concat, Reshape, Squeeze, Flatten from fedot_ind.core.models.nn.network_modules.layers.pooling_layers import GACP1d, GAP1d -def torch_slice_by_dim(t, - index, dim=-1, - **kwargs): +def torch_slice_by_dim(t, index, dim=-1, **kwargs): if not isinstance(index, torch.Tensor): index = torch.Tensor(index) assert t.ndim == index.ndim, "t and index must have the same ndim" @@ -67,41 +66,50 @@ def __init__(self, **kwargs): window_size = int(round(seq_len * window_perc, 0)) - self.conv2dblock = nn.Sequential( - *[Unsqueeze(1), - Conv2d(1, number_filters, kernel_size=(1, window_size), - padding='same'), - BatchNorm(number_filters), - nn.ReLU()]) - self.conv2d1x1block = nn.Sequential( - *[nn.Conv2d(number_filters, 1, kernel_size=1), nn.ReLU(), Squeeze(1)]) - self.conv1dblock = nn.Sequential( - *[Conv1d( - input_dim, - number_filters, - kernel_size=window_size, - padding='same'), - BatchNorm(number_filters, ndim=1), - nn.ReLU()]) - self.conv1d1x1block = nn.Sequential(*[nn.Conv1d( - number_filters, - 1, - kernel_size=1), - nn.ReLU()]) + + self.conv2dblock = nn.Sequential(*[Conv2d(input_dim, + number_filters, + kernel_size=(1, window_size), + padding='same'), + BatchNorm(number_filters), + nn.ReLU()]) + + self.conv2d1x1block = nn.Sequential(*[nn.Conv2d(in_channels=number_filters, + out_channels=1, + kernel_size=1), + nn.ReLU(), + Squeeze(1)]) + + self.conv1dblock = nn.Sequential(*[Flatten(), + Conv1d(input_dim, + number_filters, + kernel_size=window_size, + padding='same'), + BatchNorm(number_filters, + ndim=1), + nn.ReLU()]) + + self.conv1d1x1block = nn.Sequential(*[nn.Conv1d(number_filters, + 1, + kernel_size=1), + nn.ReLU()]) + self.flatten = Flatten() self.concat = Concat() - self.conv1d = nn.Sequential( - *[Conv1d(input_dim + 1, - number_filters, - kernel_size=window_size, - padding='same'), - BatchNorm(number_filters, ndim=1), nn.ReLU()]) + self.conv1d = nn.Sequential(*[Conv1d(input_dim - 1, + number_filters, + kernel_size=window_size, + padding='same'), + BatchNorm(number_filters, ndim=1), + nn.ReLU()]) self.head_number_filters = number_filters self.output_dim = output_dim self.seq_len = seq_len if custom_head: - self.head = custom_head( - self.head_number_filters, output_dim, seq_len, **kwargs) + self.head = custom_head(self.head_number_filters, + output_dim, + seq_len, + **kwargs) else: self.head = create_head( self.head_number_filters, @@ -113,6 +121,56 @@ def __init__(self, batch_norm=batch_norm, y_range=y_range) + def forward(self, x): + x1 = self.conv2dblock(x) + x1 = self.conv2d1x1block(x1) + x2 = self.conv1dblock(x) + x2 = self.conv1d1x1block(x2) + x1 = x1.reshape(x1.shape[0], 1, -1) + + out = self.concat((x2, x1)) + out = self.conv1d(out) + out = self.head(out) + return out + + def explain(self, input_data): + target = input_data.target + features = input_data.features + median_dict = {} + for class_number in input_data.class_labels: + class_target_idx = np.where(target == class_number)[0] + median_sample = np.median(features[class_target_idx], axis=0) + median_dict.update({f'class_{class_number}': median_sample}) + input_data.supplementary_data = median_dict + + self._explain_by_gradcam(input_data) + + @convert_inputdata_to_torch_dataset + def _explain_by_gradcam(self, + input_data, + detach=True, + cpu=True, + apply_relu=True, + cmap='inferno', + figsize=None, + **kwargs): + + att_maps = self.get_attribution_map(model=self, + modules=[self.conv2dblock, self.conv1dblock], + features=input_data.x, + target=input_data.y, + detach=detach, + cpu=cpu, + apply_relu=apply_relu) + att_maps[0] = (att_maps[0] - att_maps[0].min()) / (att_maps[0].max() - att_maps[0].min()) + att_maps[1] = (att_maps[1] - att_maps[1].min()) / (att_maps[1].max() - att_maps[1].min()) + + visualise_gradcam(att_maps, + input_data.supplementary_data, + figsize, + cmap, + **kwargs) + def _get_acts_and_grads(self, model, modules, @@ -120,15 +178,14 @@ def _get_acts_and_grads(self, y=None, detach=True, cpu=False): - r"""Returns activations and gradients for given modules in a model and a single input or a batch. - Gradients require y value(s). If they are not provided, it will use the predictions. """ + """Returns activations and gradients for given modules in a model and a single input or a batch. + Gradients require y value(s). If they are not provided, it will use the predictions. + + """ if not isinstance(modules, list): modules = [modules] x = x[None, None] if x.ndim == 1 else x[None] if x.ndim == 2 else x - if cpu: - model = model.cpu() - x = x.cpu() - x = x.permute(1, 0, 2) + with hook_outputs(modules, detach=detach, cpu=cpu) as h_act: with hook_outputs(modules, grad=True, detach=detach, cpu=cpu) as h_grad: preds = model.eval()(x) @@ -145,9 +202,7 @@ def _get_acts_and_grads(self, if len(modules) == 1: return h_act.stored[0].data, h_grad.stored[0][0].data else: - return [ - h.data for h in h_act.stored], [ - h[0].data for h in h_grad.stored] + return [h.data for h in h_act.stored], [h[0].data for h in h_grad.stored] def get_attribution_map(self, model, @@ -166,74 +221,26 @@ def _get_attribution_map(A_k, w_ck): if L_c.ndim == 3: return L_c.squeeze(0) if L_c.shape[0] == 1 else L_c else: - return L_c.repeat( - features.shape[1], - 1) if L_c.shape[0] == 1 else L_c.unsqueeze(1).repeat( - 1, - features.shape[1], - 1) + return L_c.repeat(features.shape[1], + 1) if L_c.shape[0] == 1 else L_c.unsqueeze(1).repeat(1, + features.shape[1], + 1) if features.ndim == 1: features = features[None, None] elif features.ndim == 2: features = features[None] - A_k, w_ck = self._get_acts_and_grads( - model, modules, features, target, detach=detach, cpu=cpu) + A_k, w_ck = self._get_acts_and_grads(model, + modules, + features, + target, + detach=detach, + cpu=cpu) if isinstance(A_k, list): - return [ - _get_attribution_map( - A_k[i], - w_ck[i]) for i in range( - len(A_k))] + return [_get_attribution_map(A_k[i], w_ck[i]) for i in range(len(A_k))] else: return _get_attribution_map(A_k, w_ck) - def forward(self, x): - x1 = self.conv2dblock(x) - x1 = self.conv2d1x1block(x1) - x2 = self.conv1dblock(x) - x2 = self.conv1d1x1block(x2) - out = self.concat((x2, x1)) - out = self.conv1d(out) - out = self.head(out) - return out - - def explain(self, input_data): - target = input_data.target - features = input_data.features - median_dict = {} - for class_number in input_data.class_labels: - class_target_idx = np.where(target == class_number)[0] - median_sample = np.median(features[class_target_idx], axis=0) - median_dict.update({f'class_{class_number}': median_sample}) - input_data.supplementary_data = median_dict - self._explain_by_gradcam(input_data) - - @convert_inputdata_to_torch_dataset - def _explain_by_gradcam(self, - input_data, - detach=True, - cpu=True, - apply_relu=True, - cmap='inferno', - figsize=None, - **kwargs): - - att_maps = self.get_attribution_map(model=self, - modules=[self.conv2dblock, - self.conv1dblock], - features=input_data.x, - target=input_data.y, - detach=detach, - cpu=cpu, - apply_relu=apply_relu) - att_maps[0] = (att_maps[0] - att_maps[0].min()) / \ - (att_maps[0].max() - att_maps[0].min()) - att_maps[1] = (att_maps[1] - att_maps[1].min()) / \ - (att_maps[1].max() - att_maps[1].min()) - visualise_gradcam( - att_maps, input_data.supplementary_data, figsize, cmap, **kwargs) - class XCModel(BaseNeuralModel): """Class responsible for Time series transformer (TST) model implementation. @@ -247,12 +254,14 @@ class XCModel(BaseNeuralModel): from examples.fedot.fedot_ex import init_input_data from fedot_ind.tools.loader import DataLoader from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels + train_data, test_data = DataLoader(dataset_name='Lightning7').load_data() input_data = init_input_data(train_data[0], train_data[1]) val_data = init_input_data(test_data[0], test_data[1]) + with IndustrialModels(): - pipeline = PipelineBuilder().add_node('inception_model', params={'epochs': 100, - 'batch_size': 10}).build() + pipeline = PipelineBuilder().add_node('xcm_model', params={'epochs': 100, + 'batch_size': 10}).build() pipeline.fit(input_data) target = pipeline.predict(val_data).predict metric = evaluate_metric(target=test_data[1], prediction=target) @@ -260,6 +269,7 @@ class XCModel(BaseNeuralModel): """ def __init__(self, params: Optional[OperationParameters] = {}): + super().__init__(params) self.num_classes = params.get('num_classes', 1) self.epochs = params.get('epochs', 100) self.batch_size = params.get('batch_size', 32) @@ -268,6 +278,7 @@ def _init_model(self, ts): self.model = XCM(input_dim=ts.features.shape[1], output_dim=self.num_classes, seq_len=ts.features.shape[2]).to(default_device()) + self.model_for_inference = self.model optimizer = optim.Adam(self.model.parameters(), lr=0.001) if ts.num_classes == 2: loss_fn = nn.CrossEntropyLoss() diff --git a/fedot_ind/core/models/nn/network_impl/mini_rocket.py b/fedot_ind/core/models/nn/network_impl/mini_rocket.py index 150654ea6..03c8ec1a4 100644 --- a/fedot_ind/core/models/nn/network_impl/mini_rocket.py +++ b/fedot_ind/core/models/nn/network_impl/mini_rocket.py @@ -208,7 +208,7 @@ def get_minirocket_features(data, _features.append(model(oi)) features = torch.cat(_features).unsqueeze(-1) if convert_to_numpy: - return features.cpu().numpy() + return features.cpu().detach().numpy() else: return features diff --git a/fedot_ind/core/models/nn/network_impl/omni_scale.py b/fedot_ind/core/models/nn/network_impl/omni_scale.py index cac2c2f58..a03ff57a5 100644 --- a/fedot_ind/core/models/nn/network_impl/omni_scale.py +++ b/fedot_ind/core/models/nn/network_impl/omni_scale.py @@ -111,7 +111,7 @@ class OmniScaleModel(BaseNeuralModel): input_data = init_input_data(train_data[0], train_data[1]) val_data = init_input_data(test_data[0], test_data[1]) with IndustrialModels(): - pipeline = PipelineBuilder().add_node('inception_model', params={'epochs': 100, + pipeline = PipelineBuilder().add_node('omniscale_model', params={'epochs': 100, 'batch_size': 10}).build() pipeline.fit(input_data) target = pipeline.predict(val_data).predict @@ -152,6 +152,7 @@ def _init_model(self, ts): loss_fn = MULTI_CLASS_CROSS_ENTROPY() return loss_fn, optimizer + @convert_to_3d_torch_array def _fit_model(self, ts: InputData, split_data: bool = False): loss_fn, optimizer = self._init_model(ts) train_loader, val_loader = self._prepare_data(ts, split_data) diff --git a/fedot_ind/core/models/nn/network_impl/patch_tst.py b/fedot_ind/core/models/nn/network_impl/patch_tst.py index c584e01a4..f1029f8f2 100644 --- a/fedot_ind/core/models/nn/network_impl/patch_tst.py +++ b/fedot_ind/core/models/nn/network_impl/patch_tst.py @@ -295,8 +295,13 @@ def _train_loop(self, model, loss.backward() model.float() optimizer.step() - adjust_learning_rate(optimizer, scheduler, - epoch + 1, args, printout=False) + + adjust_learning_rate(optimizer=optimizer, + scheduler=scheduler, + epoch=epoch + 1, + lradj=args['lradj'], + printout=False, + learning_rate=self.learning_rate) scheduler.step() train_loss = np.average(train_loss) print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f}".format( diff --git a/fedot_ind/core/models/nn/network_impl/transformer.py b/fedot_ind/core/models/nn/network_impl/transformer.py index 5e509d952..7e956b9f7 100644 --- a/fedot_ind/core/models/nn/network_impl/transformer.py +++ b/fedot_ind/core/models/nn/network_impl/transformer.py @@ -6,8 +6,8 @@ from torch.nn.modules.transformer import TransformerEncoder, TransformerEncoderLayer from fedot_ind.core.architecture.settings.computational import default_device -from .base_nn_model import BaseNeuralModel -from ..network_modules.layers.linear_layers import Max, Permute, Transpose +from fedot_ind.core.models.nn.network_impl.base_nn_model import BaseNeuralModel +from fedot_ind.core.models.nn.network_modules.layers.linear_layers import Max, Permute, Transpose class TransformerModule(Module): @@ -52,11 +52,10 @@ def __init__(self, self.outlinear = nn.Linear(d_model, output_dim) def forward(self, x): - x = self.permute(x) # bs x nvars x seq_len -> seq_len x bs x nvars + x = self.permute(x.squeeze()) # bs x nvars x seq_len -> seq_len x bs x nvars x = self.inlinear(x) # seq_len x bs x nvars -> seq_len x bs x d_model x = self.relu(x) x = self.transformer_encoder(x) - # seq_len x bs x d_model -> bs x seq_len x d_model x = self.transpose(x) x = self.max(x) x = self.relu(x) @@ -69,35 +68,25 @@ class TransformerModel(BaseNeuralModel): Attributes: self.num_features: int, the number of features. - - Example: - To use this operation you can create pipeline as follows:: - from fedot.core.pipelines.pipeline_builder import PipelineBuilder - from examples.fedot.fedot_ex import init_input_data - from fedot_ind.tools.loader import DataLoader - from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels - train_data, test_data = DataLoader(dataset_name='Lightning7').load_data() - input_data = init_input_data(train_data[0], train_data[1]) - val_data = init_input_data(test_data[0], test_data[1]) - with IndustrialModels(): - pipeline = PipelineBuilder().add_node('tst_model', params={'epochs': 100, - 'batch_size': 10}).build() - pipeline.fit(input_data) - target = pipeline.predict(val_data).predict - metric = evaluate_metric(target=test_data[1], prediction=target) + self.epoch: int, the number of epochs. + self.batch_size: int, the batch size. """ def __init__(self, params: Optional[OperationParameters] = {}): + super().__init__(params) self.num_classes = params.get('num_classes', 1) self.epochs = params.get('epochs', 10) self.batch_size = params.get('batch_size', 20) def _init_model(self, ts): self.model = TransformerModule( - input_dim=ts.features.shape[1], + input_dim=ts.features.shape[2], output_dim=self.num_classes).to( default_device()) + + self.model_for_inference = self.model + optimizer = optim.Adam(self.model.parameters(), lr=0.001) loss_fn = nn.CrossEntropyLoss() return loss_fn, optimizer diff --git a/fedot_ind/core/models/nn/network_impl/tst.py b/fedot_ind/core/models/nn/network_impl/tst.py index fca9b45df..fa7e426a4 100644 --- a/fedot_ind/core/models/nn/network_impl/tst.py +++ b/fedot_ind/core/models/nn/network_impl/tst.py @@ -8,6 +8,7 @@ from fedot.core.pipelines.pipeline_builder import PipelineBuilder from torch import nn, optim, Tensor +from fedot_ind.api.utils.data import init_input_data from fedot_ind.core.architecture.settings.computational import default_device from fedot_ind.core.models.nn.network_impl.base_nn_model import BaseNeuralModel from fedot_ind.core.models.nn.network_modules.activation import get_activation_fn @@ -16,6 +17,7 @@ from fedot_ind.core.models.nn.network_modules.layers.conv_layers import Conv1d from fedot_ind.core.models.nn.network_modules.layers.linear_layers import Flatten, Transpose from fedot_ind.core.models.nn.network_modules.layers.padding_layers import Pad1d +from fedot_ind.tools.loader import DataLoader class _TSTEncoderLayer(Module): @@ -294,18 +296,25 @@ def _init_model(self, ts): dataset_list = ['Lightning2'] result_dict = {} pipeline_dict = { - 'omniscale_model': PipelineBuilder().add_node( - 'tst_model', - params={ - 'epochs': 50, - 'batch_size': 32}), - 'quantile_rf_model': PipelineBuilder() .add_node('quantile_extractor') .add_node('rf'), - 'composed_model': PipelineBuilder() .add_node( - 'tst_model', - params={ - 'epochs': 50, - 'batch_size': 32}) .add_node( - 'quantile_extractor', - branch_idx=1) .add_node( - 'rf', - branch_idx=1) .join_branches('logit')} + 'omniscale_model': PipelineBuilder().add_node('tst_model', + params={'epochs': 50, + 'batch_size': 32} + ), + + 'quantile_rf_model': PipelineBuilder().add_node('quantile_extractor').add_node('rf'), + + 'composed_model': PipelineBuilder().add_node('tst_model', params={'epochs': 50, 'batch_size': 32}) + .add_node('quantile_extractor', branch_idx=1) + .add_node('rf', branch_idx=1) + .join_branches('logit')} + + train_data, test_data = DataLoader(dataset_list[0]).load_data() + input_train = init_input_data(train_data[0], train_data[1]) + input_test = init_input_data(test_data[0], test_data[1]) + + for name, ppl in pipeline_dict.items(): + print(f'<------{name} pipeline fit------>') + ppl = ppl.build() + ppl.fit(input_train) + pred = ppl.predict(input_test) + print(f'<------{name} pipeline fitted------>') diff --git a/fedot_ind/core/models/nn/network_modules/layers/conv_layers.py b/fedot_ind/core/models/nn/network_modules/layers/conv_layers.py index f7b5f1a66..df3ba9571 100644 --- a/fedot_ind/core/models/nn/network_modules/layers/conv_layers.py +++ b/fedot_ind/core/models/nn/network_modules/layers/conv_layers.py @@ -19,9 +19,9 @@ class Conv2dSame(Module): """ def __init__( - self, ni, nf, ks=( - 3, 3), stride=( - 1, 1), dilation=( + self, ni, nf, ks=( + 3, 3), stride=( + 1, 1), dilation=( 1, 1), **kwargs): if isinstance(ks, Integral): ks = (ks, ks) @@ -75,42 +75,37 @@ def Conv2d( class CausalConv1d(torch.nn.Conv1d): def __init__(self, ni, nf, ks, stride=1, dilation=1, groups=1, bias=True): - super( - CausalConv1d, - self).__init__( - ni, - nf, - kernel_size=ks, - stride=stride, - padding=0, - dilation=dilation, - groups=groups, - bias=bias) + super(CausalConv1d, self).__init__(ni, + nf, + kernel_size=ks, + stride=stride, + padding=0, + dilation=dilation, + groups=groups, + bias=bias) self.__padding = (ks - 1) * dilation def forward(self, input): - return super( - CausalConv1d, self).forward( - F.pad( - input, (self.__padding, 0))) + return super(CausalConv1d, self).forward(F.pad(input, + (self.__padding, 0)) + ) @delegates(nn.Conv1d.__init__) -def Conv1d( - ni, - nf, - kernel_size=None, - ks=None, - stride=1, - padding='same', - dilation=1, - init='auto', - bias_std=0.01, - **kwargs): +def Conv1d(ni, + nf, + kernel_size=None, + ks=None, + stride=1, + padding='same', + dilation=1, + init='auto', + bias_std=0.01, + **kwargs): """conv1d layer with padding='same', 'causal', 'valid', or any integer (defaults to 'same')""" - assert not ( - kernel_size and ks), 'use kernel_size or ks but not both simultaneously' + assert not (kernel_size and ks), 'use kernel_size or ks but not both simultaneously' assert kernel_size is not None or ks is not None, 'you need to pass a ks' + kernel_size = kernel_size or ks if padding == 'same': if kernel_size % 2 == 1: @@ -119,9 +114,7 @@ def Conv1d( nf, kernel_size, stride=stride, - padding=kernel_size // - 2 * - dilation, + padding=kernel_size // 2 * dilation, dilation=dilation, **kwargs) else: diff --git a/fedot_ind/core/models/nn/network_modules/layers/linear_layers.py b/fedot_ind/core/models/nn/network_modules/layers/linear_layers.py index 362bec46e..714b2fffd 100644 --- a/fedot_ind/core/models/nn/network_modules/layers/linear_layers.py +++ b/fedot_ind/core/models/nn/network_modules/layers/linear_layers.py @@ -22,13 +22,24 @@ def init_lin_zero(m): lin_zero_init = init_lin_zero -class Flatten(nn.Module): - def __init__(self, out_features): - super(Flatten, self).__init__() - self.output_dim = out_features +# class Flatten(nn.Module): +# def __init__(self, out_features): +# super(Flatten, self).__init__() +# self.output_dim = out_features +# +# def forward(self, x): +# return x.view(-1, self.output_dim) +# + +class Flatten(Module): def forward(self, x): - return x.view(-1, self.output_dim) + bs, c, h, w = x.shape + flattened_tensor = x.reshape(bs, c, h * w) + return flattened_tensor + + def __repr__(self): + return f"{self.__class__.__name__}" class Reshape(nn.Module): diff --git a/fedot_ind/core/models/nn/network_modules/layers/pooling_layers.py b/fedot_ind/core/models/nn/network_modules/layers/pooling_layers.py index 6bb3836d2..3db7b4b01 100644 --- a/fedot_ind/core/models/nn/network_modules/layers/pooling_layers.py +++ b/fedot_ind/core/models/nn/network_modules/layers/pooling_layers.py @@ -220,9 +220,8 @@ def forward(self, x): elif self.method == 'mean': return torch.mean(x, -1) if self.seq_last else torch.mean(x, 1) elif self.method == 'max-mean': - return torch.cat([torch.max(x, - - 1)[0] if self.seq_last else torch.max(x, 1)[0], torch.mean(x, - - 1) if self.seq_last else torch.mean(x, 1)], 1) + return torch.cat([torch.max(x, - 1)[0] if self.seq_last else torch.max(x, 1)[0], + torch.mean(x, - 1) if self.seq_last else torch.mean(x, 1)], 1) elif self.method == 'flatten': return x.flatten(1) elif self.method == 'linear' or self.method == 'conv1d': diff --git a/fedot_ind/core/models/nn/network_modules/losses.py b/fedot_ind/core/models/nn/network_modules/losses.py index 7cb6758f0..902fec508 100644 --- a/fedot_ind/core/models/nn/network_modules/losses.py +++ b/fedot_ind/core/models/nn/network_modules/losses.py @@ -61,8 +61,10 @@ def forward(self, loss_normalized (torch.Tensor): loss, where regularization parameters are 1. """ # res = torch.sum(input ** 2, dim=0).reshape(self.n_t, -1) - res = torch.mean(input_, axis=0).reshape(self.n_t, -1) - target = torch.mean(target, axis=0).reshape(self.n_t, -1) + res = torch.sum(input_ ** 2, dim=1).reshape(self.n_t, -1) + + # target = torch.mean(target, axis=0).reshape(self.n_t, -1) + target = torch.mean(target.reshape(self.n_t, -1), axis=0) m = torch.triu( torch.ones( (self.n_t, diff --git a/fedot_ind/core/models/nn/network_modules/other.py b/fedot_ind/core/models/nn/network_modules/other.py index 1f9ffd7a6..3633918e0 100644 --- a/fedot_ind/core/models/nn/network_modules/other.py +++ b/fedot_ind/core/models/nn/network_modules/other.py @@ -22,7 +22,7 @@ def pass_through(X): return X -def test_module_to_torchscript( +def if_module_to_torchscript( m: torch.nn.Module, inputs: Tensor, trace: bool = True, @@ -115,46 +115,6 @@ def init_lin_zero(m): lin_zero_init = init_lin_zero -# @delegates(nn.Conv2d.__init__) - - -# Conv = named_partial('Conv', ConvBlock, norm=None, act=None) -# ConvBN = named_partial('ConvBN', ConvBlock, norm='Batch', act=None) -# CoordConv = named_partial('CoordConv', ConvBlock, norm=None, act=None, coord=True) -# SepConv = named_partial('SepConv', ConvBlock, norm=None, act=None, separable=True) - - -# class ResBlock1dPlus(Module): -# "Resnet block from `ni` to `nh` with `stride`" -# -# @delegates(ConvLayer.__init__) -# def __init__(self, expansion, ni, nf, coord=False, stride=1, groups=1, reduction=None, nh1=None, nh2=None, dw=False, -# g2=1, -# sa=False, sym=False, norm='Batch', zero_norm=True, act_cls=defaults.activation, ks=3, -# pool=AvgPool, pool_first=True, **kwargs): -# if nh2 is None: nh2 = nf -# if nh1 is None: nh1 = nh2 -# nf, ni = nf * expansion, ni * expansion -# k0 = dict(norm=norm, zero_norm=False, act=act_cls, **kwargs) -# k1 = dict(norm=norm, zero_norm=zero_norm, act=None, **kwargs) -# convpath = [ConvBlock(ni, nh2, ks, coord=coord, stride=stride, groups=ni if dw else groups, **k0), -# ConvBlock(nh2, nf, ks, coord=coord, groups=g2, **k1) -# ] if expansion == 1 else [ -# ConvBlock(ni, nh1, 1, coord=coord, **k0), -# ConvBlock(nh1, nh2, ks, coord=coord, stride=stride, groups=nh1 if dw else groups, **k0), -# ConvBlock(nh2, nf, 1, coord=coord, groups=g2, **k1)] -# if reduction: convpath.append(SEModule(nf, reduction=reduction, act_cls=act_cls)) -# if sa: convpath.append(SimpleSelfAttention(nf, ks=1, sym=sym)) -# self.convpath = nn.Sequential(*convpath) -# idpath = [] -# if ni != nf: idpath.append(ConvBlock(ni, nf, 1, coord=coord, act=None, **kwargs)) -# if stride != 1: idpath.insert((1, 0)[pool_first], pool(stride, ndim=1, ceil_mode=True)) -# self.idpath = nn.Sequential(*idpath) -# self.act = defaults.activation(inplace=True) if act_cls is defaults.activation else act_cls() -# -# def forward(self, x): -# return self.act(self.convpath(x) + self.idpath(x)) - class DropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). @@ -227,7 +187,7 @@ def forward(self, x): return y -class Temp_Scale(Module): +class TempScale(Module): """Used to perform Temperature Scaling (dirichlet=False) or Single-parameter Dirichlet calibration (dirichlet=True)""" @@ -277,13 +237,13 @@ def get_calibrator(calibrator=None, n_classes=1, **kwargs): if calibrator is None or not calibrator: return Noop elif calibrator.lower() == 'temp': - return Temp_Scale(dirichlet=False, **kwargs) + return TempScale(dirichlet=False, **kwargs) elif calibrator.lower() == 'vector': return VectorScale(n_classes=n_classes, dirichlet=False, **kwargs) elif calibrator.lower() == 'matrix': return MatrixScale(n_classes=n_classes, dirichlet=False, **kwargs) elif calibrator.lower() == 'dtemp': - return Temp_Scale(dirichlet=True, **kwargs) + return TempScale(dirichlet=True, **kwargs) elif calibrator.lower() == 'dvector': return VectorScale(n_classes=n_classes, dirichlet=True, **kwargs) elif calibrator.lower() == 'dmatrix': diff --git a/fedot_ind/core/models/ts_forecasting/ssa_forecaster.py b/fedot_ind/core/models/ts_forecasting/ssa_forecaster.py index a99f62344..f34c69b1d 100644 --- a/fedot_ind/core/models/ts_forecasting/ssa_forecaster.py +++ b/fedot_ind/core/models/ts_forecasting/ssa_forecaster.py @@ -52,8 +52,10 @@ def __init__(self, params: Optional[OperationParameters] = None): 'tuning_timeout': 20, 'tuning_early_stop': 20, 'tuner': SimultaneousTuner} - component_mode_dict = {'topological': PipelineBuilder().add_node('lagged').add_node( - 'topological_features').add_node('treg'), 'ar': PipelineBuilder().add_node('ar')} + component_mode_dict = { + 'topological': PipelineBuilder().add_node('lagged').add_node('topological_features').add_node('treg'), + 'ar': PipelineBuilder().add_node('ar') + } self.window_size_method = params.get('window_size_method') self.history_lookback = max(params.get('history_lookback', 0), 100) @@ -85,10 +87,8 @@ def _tune_component_model(self, model_to_tune, component): def _combine_trajectory(self, U, VT, n_components): if len(self._rank_thr) > 2: - self.PCT = np.concatenate([U[:, 0].reshape(1, - - 1), np.array([np.sum([U[:, i], U[:, i + - 1]], axis=0) for i in self._rank_thr if i != 0 and i % - 2 != 0])]).T + self.PCT = np.concatenate([U[:, 0].reshape( + 1, - 1), np.array([np.sum([U[:, i], U[:, i + 1]], axis=0) for i in self._rank_thr if i != 0 and i % 2 != 0])]).T current_dynamics = np.concatenate([VT[0, :].reshape(1, -1), np.array([np.sum( [VT[i, :], VT[i + 1, :]], axis=0) for i in self._rank_thr if i != 0 and i % 2 != 0])]) @@ -154,7 +154,7 @@ def __predict_for_fit(self, ts): reconstructed_features = np.array(components_correlation).sum(axis=0) return reconstructed_features - def predict_for_fit(self, input_data: InputData) -> OutputData: + def predict_for_fit(self, input_data: InputData) -> np.ndarray: if self.horizon is None: self.horizon = input_data.task.task_params.forecast_length if input_data.features.shape[0] > self.history_lookback: @@ -165,9 +165,12 @@ def predict_for_fit(self, input_data: InputData) -> OutputData: else: self.history_lookback = None self._decomposer = EigenBasisImplementation( - { - 'low_rank_approximation': self.low_rank_approximation, - 'rank_regularization': 'explained_dispersion'}) + OperationParameters( + low_rank_approximation=self.low_rank_approximation, + rank_regularization='explained_dispersion' + ) + ) + predict = self.__predict_for_fit(input_data) return predict diff --git a/fedot_ind/core/operation/IndustrialCachableOperation.py b/fedot_ind/core/operation/IndustrialCachableOperation.py index 008f43bac..e304fe660 100644 --- a/fedot_ind/core/operation/IndustrialCachableOperation.py +++ b/fedot_ind/core/operation/IndustrialCachableOperation.py @@ -74,26 +74,25 @@ def transform( if use_cache: class_params = { k: v for k, - v in self.__dict__.items() if k not in [ - 'cacher', - 'data_type', - 'params', - 'n_processes', - 'logging_params', - 'logger', - 'relevant_features']} - - hashed_info = self.cacher.hash_info( - data=input_data.features, - operation_info=class_params.__repr__()) + v in self.__dict__.items() if k not in ['cacher', + 'data_type', + 'params', + 'n_processes', + 'logging_params', + 'logger', + 'relevant_features']} + + hashed_info = self.cacher.hash_info(data=input_data.features, + operation_info=class_params.__repr__()) try: predict = self.try_load_from_cache(hashed_info) except FileNotFoundError: predict = self._transform(input_data) self.cacher.cache_data(hashed_info, predict) - predict = self._convert_to_output( - input_data, predict, data_type=self.data_type) + predict = self._convert_to_output(input_data, + predict, + data_type=self.data_type) return predict else: transformed_features = self._transform(input_data) diff --git a/fedot_ind/core/operation/transformation/basis/eigen_basis.py b/fedot_ind/core/operation/transformation/basis/eigen_basis.py index 084f64bbd..bab80559e 100644 --- a/fedot_ind/core/operation/transformation/basis/eigen_basis.py +++ b/fedot_ind/core/operation/transformation/basis/eigen_basis.py @@ -96,11 +96,11 @@ def _transform(self, input_data: InputData) -> np.array: features = NumpyConverter(data=features).convert_to_torch_format() def tensor_decomposition(x): - return ListMonad(self._get_multidim_basis( - x)) if self.tensor_approximation else self._channel_decompose(x) + return ListMonad( + self._get_multidim_basis(x) + ) if self.tensor_approximation else self._channel_decompose(x) - basis = np.array(Either.insert(features).then( - tensor_decomposition).value[0]) + basis = np.array(Either.insert(features).then(tensor_decomposition).value[0]) predict = self._convert_basis_to_predict(basis, input_data) return predict diff --git a/fedot_ind/core/repository/constanst_repository.py b/fedot_ind/core/repository/constanst_repository.py index 26a66fd48..8f1a066a3 100644 --- a/fedot_ind/core/repository/constanst_repository.py +++ b/fedot_ind/core/repository/constanst_repository.py @@ -250,7 +250,7 @@ class FedotOperationConstant(Enum): ] FEDOT_ASSUMPTIONS = { - 'classification': PipelineBuilder().add_node('channel_filtration'). add_node('quantile_extractor').add_node('xgboost'), + 'classification': PipelineBuilder().add_node('channel_filtration').add_node('quantile_extractor').add_node('xgboost'), 'regression': PipelineBuilder().add_node('quantile_extractor').add_node('treg'), 'ts_forecasting': PipelineBuilder().add_node( 'eigen_basis', @@ -610,37 +610,46 @@ class BenchmarkDatasets(Enum): class UnitTestConstant(Enum): VALID_LINEAR_CLF_PIPELINE = { - 'eigen_statistical': [ - 'eigen_basis', 'quantile_extractor', 'logit'], 'channel_filtration_statistical': [ - 'channel_filtration', 'quantile_extractor', 'logit'], 'fourier_statistical': [ - 'fourier_basis', 'quantile_extractor', 'logit'], 'wavelet_statistical': [ - 'wavelet_basis', 'quantile_extractor', 'logit'], 'recurrence_clf': [ - 'recurrence_extractor', 'logit'], 'riemann_clf': [ - 'riemann_extractor', 'logit'], 'topological_clf': [ - 'topological_extractor', 'logit'], 'statistical_clf': [ - 'quantile_extractor', 'logit'], 'statistical_lgbm': [ - 'quantile_extractor', 'lgbm'], 'composite_clf': { - 0: ['quantile_extractor'], 1: ['riemann_extractor'], 2: [ - 'fourier_basis', 'quantile_extractor'], 'head': 'mlp'}} + 'eigen_statistical': ['eigen_basis', 'quantile_extractor', 'logit'], + 'channel_filtration_statistical': ['channel_filtration', 'quantile_extractor', 'logit'], + 'fourier_statistical': ['fourier_basis', 'quantile_extractor', 'logit'], + 'wavelet_statistical': ['wavelet_basis', 'quantile_extractor', 'logit'], + 'recurrence_clf': ['recurrence_extractor', 'logit'], + 'riemann_clf': ['riemann_extractor', 'logit'], + 'topological_clf': ['topological_extractor', 'logit'], + 'statistical_clf': ['quantile_extractor', 'logit'], + 'statistical_lgbm': ['quantile_extractor', 'lgbm'], + 'composite_clf': {0: ['quantile_extractor'], + 1: ['riemann_extractor'], + 2: ['fourier_basis', 'quantile_extractor'], + 'head': 'mlp' + }} + VALID_LINEAR_REG_PIPELINE = { - 'eigen_statistical_reg': [ - 'eigen_basis', 'quantile_extractor', 'treg'], 'channel_filtration_statistical_reg': [ - 'channel_filtration', 'quantile_extractor', 'treg'], 'fourier_statistical_reg': [ - 'fourier_basis', 'quantile_extractor', 'treg'], 'wavelet_statistical_reg': [ - 'wavelet_basis', 'quantile_extractor', 'treg'], 'recurrence_reg': [ - 'recurrence_extractor', 'treg'], 'topological_reg': [ - 'topological_extractor', 'treg'], 'statistical_reg': [ - 'quantile_extractor', 'treg'], 'statistical_lgbmreg': [ - 'quantile_extractor', 'lgbmreg'], 'composite_reg': { - 0: ['quantile_extractor'], 1: ['topological_extractor'], 2: [ - 'fourier_basis', 'quantile_extractor'], 'head': 'treg'}} - VALID_LINEAR_TSF_PIPELINE = { - 'stl_arima': ['stl_arima'], 'topological_lgbm': [ - 'topological_extractor', 'lgbmreg'], 'ar': ['ar'], 'eigen_autoregression': [ - 'eigen_basis', 'ar'], 'smoothed_ar': [ - 'smoothing', 'ar'], 'gaussian_ar': [ - 'gaussian_filter', 'ar'], 'glm': ['glm'], 'nbeats': ['nbeats_model'], - 'tcn': ['tcn_model']} + 'eigen_statistical_reg': ['eigen_basis', 'quantile_extractor', 'treg'], + 'channel_filtration_statistical_reg': ['channel_filtration', 'quantile_extractor', 'treg'], + 'fourier_statistical_reg': ['fourier_basis', 'quantile_extractor', 'treg'], + 'wavelet_statistical_reg': ['wavelet_basis', 'quantile_extractor', 'treg'], + 'recurrence_reg': ['recurrence_extractor', 'treg'], + 'topological_reg': ['topological_extractor', 'treg'], + 'statistical_reg': ['quantile_extractor', 'treg'], + 'statistical_lgbmreg': ['quantile_extractor', 'lgbmreg'], + 'composite_reg': {0: ['quantile_extractor'], + 1: ['topological_extractor'], + 2: ['fourier_basis', 'quantile_extractor'], + 'head': 'treg'} + } + + VALID_LINEAR_TSF_PIPELINE = {'stl_arima': ['stl_arima'], + 'topological_lgbm': ['topological_extractor', 'lgbmreg'], + 'ar': ['ar'], + 'eigen_autoregression': ['eigen_basis', 'ar'], + 'smoothed_ar': ['smoothing', 'ar'], + 'gaussian_ar': ['gaussian_filter', 'ar'], + 'glm': ['glm'], + 'nbeats': ['nbeats_model'], + 'tcn': ['tcn_model'] + } STAT_METHODS = FeatureConstant.STAT_METHODS.value diff --git a/fedot_ind/core/repository/data/default_operation_params.json b/fedot_ind/core/repository/data/default_operation_params.json index 822d1d684..678a56fbb 100644 --- a/fedot_ind/core/repository/data/default_operation_params.json +++ b/fedot_ind/core/repository/data/default_operation_params.json @@ -364,13 +364,13 @@ "nbeats_model": { "epochs": 100, "batch_size": 32, - "degree_of_polynomial": 3, + "n_stacks": 10, "n_trend_blocks": 3, - "n_trend_layers": 3, "n_seasonality_blocks": 2, - "n_seasonality_layers ": 2, "n_of_harmonics": 1, - "n_stacks": 10, + "degree_of_polynomial": 3, + "n_trend_layers": 3, + "n_seasonality_layers ": 2, "layers": 4 }, "lora_model": { diff --git a/fedot_ind/core/tuning/search_space.py b/fedot_ind/core/tuning/search_space.py index d752f827b..2d78c228e 100644 --- a/fedot_ind/core/tuning/search_space.py +++ b/fedot_ind/core/tuning/search_space.py @@ -81,11 +81,17 @@ 'activation': {'hyperopt-dist': hp.choice, 'sampling-scope': [ ['LeakyReLU', 'SwishBeta', 'Tanh', 'Softmax', 'SmeLU', 'Mish']]}}, + 'xcm_model': + {'epochs': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(150, 500, 50)]]}, + 'activation': {'hyperopt-dist': hp.choice, + 'sampling-scope': [ + ['LeakyReLU', 'SwishBeta', 'Tanh', 'Softmax', 'SmeLU', 'Mish']]}}, + 'tcn_model': {'epochs': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(150, 500, 50)]]}, - 'activation': {'hyperopt-dist': hp.choice, - 'sampling-scope': [ - ['LeakyReLU', 'SwishBeta', 'Tanh', 'Softmax', 'SmeLU', 'Mish']]}}, + 'activation': {'hyperopt-dist': hp.choice, + 'sampling-scope': [ + ['LeakyReLU', 'SwishBeta', 'Tanh', 'Softmax', 'SmeLU', 'Mish']]}}, 'nbeats_model': {'epochs': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(50, 200, 20)]]}, 'batch_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[8, 16, 32]]}, diff --git a/fedot_ind/tools/example_utils.py b/fedot_ind/tools/example_utils.py index fcba4baf6..f944b5a11 100644 --- a/fedot_ind/tools/example_utils.py +++ b/fedot_ind/tools/example_utils.py @@ -1,4 +1,5 @@ import os +import random from pathlib import Path from typing import Union @@ -10,6 +11,10 @@ from fedot_ind.core.architecture.settings.computational import backend_methods as np from fedot_ind.core.metrics.metrics_implementation import calculate_forecasting_metric from fedot_ind.tools.loader import DataLoader +from fedot.core.data.data import InputData +from fedot.core.data.data_split import train_test_data_setup +from fedot.core.repository.dataset_types import DataTypesEnum +from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams ts_datasets = { 'm4_yearly': Path(PROJECT_PATH, 'examples', 'data', 'ts', 'M4Yearly.csv'), @@ -157,3 +162,50 @@ def create_comprasion_df(df, metric: str = 'rmse'): lambda row: 'Win' if row.loc['Difference_industrial_NBEATS'] > 0 else 'Loose', axis=1) return df_full + + +def get_ts_data(dataset='m4_monthly', horizon: int = 30, m4_id=None): + ds, group = dataset.split('_') + ds = ds.lower() + if ds == 'm4': + from datasetsforecast.m4 import M4 as bench + elif ds == 'm5': + from datasetsforecast.m5 import M5 as bench + else: + raise ValueError('Dataset not found') + + df_ts, _, ids = bench.load(directory=PROJECT_PATH + '/examples/data/ts', + group=group.capitalize(), + cache=True) + + if m4_id is None: + m4_id = random.choice(ids['unique_id'].unique()) + + time_series = df_ts[df_ts['unique_id'] == m4_id]['y'] + # time_series = pd.read_csv(ts_datasets[dataset]) + + task = Task(TaskTypesEnum.ts_forecasting, + TsForecastingParams(forecast_length=horizon)) + # if not m4_id: + # label = random.choice(np.unique(time_series['label'])) + # else: + # label = m4_id + # print(label) + # time_series = time_series[time_series['label'] == label] + + # if 'datetime' in time_series.columns: + # idx = pd.to_datetime(time_series['datetime'].values) + # else: + # # non datetime indexes + # idx = time_series['idx'].values + + # time_series = time_series['value'].values + # train_input = InputData(idx=idx, + train_input = InputData(idx=time_series.index, + features=time_series.values, + target=time_series.values, + task=task, + data_type=DataTypesEnum.ts) + train_data, test_data = train_test_data_setup(train_input) + return train_data, test_data, m4_id + # return train_data, test_data, label diff --git a/fedot_ind/tools/synthetic/ts_generator.py b/fedot_ind/tools/synthetic/ts_generator.py index 73f3db2df..6ac42bfda 100644 --- a/fedot_ind/tools/synthetic/ts_generator.py +++ b/fedot_ind/tools/synthetic/ts_generator.py @@ -199,28 +199,36 @@ def savitzky_golay( if __name__ == '__main__': - ts_config = { - 'ts_type': 'random_walk', - # 'ts_type': 'sin', + sin_config = { + 'ts_type': 'sin', 'length': 1000, - - # for sin wave 'amplitude': 10, - 'period': 500, + 'period': 500 + } - # for random walk - 'start_val': 36.6, + random_walk_config = { + 'ts_type': 'random_walk', + 'length': 1000, + 'start_val': 36.6 + } - # for auto regression + auto_regression_config = { + 'ts_type': 'auto_regression', + 'length': 1000, 'ar_params': [0.5, -0.3, 0.2], - 'initial_values': None, + 'initial_values': None + } - # for smooth normal - 'window_size': 300} + smooth_normal_config = { + 'ts_type': 'smooth_normal', + 'length': 1000, + 'window_size': 300 + } - ts_generator = TimeSeriesGenerator(ts_config) - ts = ts_generator.get_ts() + for config in [sin_config, random_walk_config, auto_regression_config, smooth_normal_config]: + ts_generator = TimeSeriesGenerator(config) + ts = ts_generator.get_ts() + plt.plot(ts) - plt.plot(ts) plt.show() _ = 1 diff --git a/pyproject.toml b/pyproject.toml index badcf0cd4..d40cd8e47 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,45 +8,36 @@ readme = "README_en.rst" [tool.poetry.dependencies] python = ">=3.9,<3.12" -fedot = "^0.7.3" catboost = [ - {version = "1.1.1", markers = "sys_platform == 'Darwin'"}, - {version = "*", markers = "sys_platform != 'Darwin'"} + {version = "1.1.1", markers = "sys_platform == 'darwin'"}, + {version = "*", markers = "sys_platform != 'darwin'"} ] -mklpy = "0.6" -PyMonad = "2.4.0" -PyWavelets = "1.4.1" -giotto_tda = "0.6.0" -ripser = "0.6.4" -fastcore = "~1.5.29" -fastai = "~2.7.14" -distributed = "~2023.12.0" -datasetsforecast = "~0.0.8" -tensorly = "0.8.1" +fedot = "^0.7.3" torch = "~2.2.0" torchvision = "~0.17.0" -statsforecast = "1.5.0" +setuptools = "^70.0.0" chardet = "~5.2.0" -matplotlib = "~3.8.2" -scipy = ">=1.10.0" +tensorly = "0.8.1" +pymonad = "2.4.0" +pywavelets = "1.4.1" +giotto-tda = ">=0.6.0" +ripser = "0.6.4" +fastcore = "~1.5.29" +fastai = "~2.7.14" sktime = ">=0.16.1" -tqdm = "^4.65.0" +distributed = "~2023.12.0" +mklpy = "0.6" librosa = "~0.10.1" -pillow = "~10.2.0" pyriemann = "~0.5" -seaborn = "~0.13.2" -datasets = "^2.18.0" -pandas = "^1.5.3" -numpy = "1.24.4" - -[tool.poetry.group.docs.dependencies] +pyarrow = "15.0.1" +datasetsforecast = "^0.0.8" +datasets = "^2.19.2" +matplotlib = "~3.8.2" +numpy = "1.23.2" +pytest-cov = "^5.0.0" sphinx-rtd-theme = "^2.0.0" -sphinx = "^7.3.7" -autodocsumm = "^0.2.12" -[tool.poetry.group.test.dependencies] -pytest = "~8.0.0" -pytest-cov = "^5.0.0" + [build-system] diff --git a/tests/integration/ts_classification/test_ts_classification_advanced.py b/tests/integration/ts_classification/test_ts_classification_advanced.py index 6b35eae9a..083dd092d 100644 --- a/tests/integration/ts_classification/test_ts_classification_advanced.py +++ b/tests/integration/ts_classification/test_ts_classification_advanced.py @@ -1,43 +1,45 @@ -import itertools - -import numpy as np -import pytest - from fedot_ind.api.main import FedotIndustrial from fedot_ind.tools.loader import DataLoader +from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator -@pytest.fixture def multi_data(): train_data, test_data = DataLoader(dataset_name='Epilepsy').load_data() return train_data, test_data -@pytest.fixture def uni_data(): train_data, test_data = DataLoader(dataset_name='Lightning7').load_data() return train_data, test_data -@pytest.mark.parametrize('data, strategy', - [itertools.combinations([uni_data, - multi_data], - ['federated_automl', - 'kernel_automl', - 'forecasting_assumptions', - 'forecasting_exogenous'])]) -def strategy_tsc_test(data, strategy): - train_data, test_data = data +def combinations(data, strategy): + return [[d, s] for d in data for s in strategy] + + +def test_federated_clf(): + api_config = dict(problem='classification', + metric='f1', + timeout=5, + n_jobs=2, + industrial_strategy='federated_automl', + industrial_strategy_params={}, + logging_level=20) - industrial = FedotIndustrial(task='classification', - timeout=2, - n_jobs=-1, - industrial_strategy=strategy) + # Huge synthetic dataset for experiment + train_data, test_data = TimeSeriesDatasetsGenerator(num_samples=1800, + task='classification', + max_ts_len=50, + binary=True, + test_size=0.5, + multivariate=False).generate_data() + industrial = FedotIndustrial(**api_config) industrial.fit(train_data) - labels = industrial.predict(test_data) - probs = industrial.predict_proba(test_data) - assert labels is not None - assert probs is not None - assert np.mean(labels) > 0 - assert np.mean(probs) > 0 + predict = industrial.predict(test_data) + + assert predict is not None + + +# ['federated_automl', +# 'kernel_automl',] diff --git a/tests/integration/ts_classification/test_ts_classification_basic.py b/tests/integration/ts_classification/test_ts_classification_basic.py index 91b8e661d..afebdbdae 100644 --- a/tests/integration/ts_classification/test_ts_classification_basic.py +++ b/tests/integration/ts_classification/test_ts_classification_basic.py @@ -5,24 +5,22 @@ from fedot_ind.tools.loader import DataLoader -@pytest.fixture def multi_data(): train_data, test_data = DataLoader(dataset_name='Epilepsy').load_data() return train_data, test_data -@pytest.fixture def uni_data(): train_data, test_data = DataLoader(dataset_name='Lightning7').load_data() return train_data, test_data @pytest.mark.parametrize('data', [multi_data, uni_data]) -def basic_tsc_test(data): - train_data, test_data = data +def test_basic_tsc_test(data): + train_data, test_data = data() - industrial = FedotIndustrial(task='classification', - timeout=2, + industrial = FedotIndustrial(problem='classification', + timeout=0.1, n_jobs=-1) industrial.fit(train_data) @@ -30,5 +28,4 @@ def basic_tsc_test(data): probs = industrial.predict_proba(test_data) assert labels is not None assert probs is not None - assert np.mean(labels) > 0 assert np.mean(probs) > 0 diff --git a/tests/integration/ts_forecast/forecast_advanced.py b/tests/integration/ts_forecast/forecast_advanced.py new file mode 100644 index 000000000..0a2017b75 --- /dev/null +++ b/tests/integration/ts_forecast/forecast_advanced.py @@ -0,0 +1,3 @@ +# Expected strategies to check: +# 'forecasting_assumptions', +# 'forecasting_exogenous' diff --git a/tests/unit/api/utils/test_industrial_strategy.py b/tests/unit/api/utils/test_industrial_strategy.py new file mode 100644 index 000000000..9ec71f5bc --- /dev/null +++ b/tests/unit/api/utils/test_industrial_strategy.py @@ -0,0 +1,74 @@ +from fedot_ind.api.utils.industrial_strategy import IndustrialStrategy +import pytest +from fedot_ind.api.main import FedotIndustrial + +from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator + +STRATEGY = ['federated_automl', 'lora_strategy', 'kernel_automl', 'forecasting_assumptions'] + +CONFIGS = {'federated_automl': {'problem': 'classification', + 'metric': 'f1', + 'timeout': 0.1, + 'industrial_strategy': 'federated_automl', + 'industrial_strategy_params': {} + }, + + 'lora_strategy': {'problem': 'classification', + 'metric': 'accuracy', + 'timeout': 0.1, + 'with_tuning': False, + 'industrial_strategy': 'lora_strategy', + 'industrial_strategy_params': {} + }, + + 'kernel_automl': {'problem': 'classification', + 'metric': 'f1', + 'timeout': 0.1, + 'with_tuning': False, + 'industrial_strategy': 'kernel_automl', + 'industrial_strategy_params': {} + }, + + 'forecasting_assumptions': {'problem': 'ts_forecasting', + 'metric': 'rmse', + 'timeout': 0.1, + 'with_tuning': False, + 'industrial_strategy': 'forecasting_assumptions', + 'industrial_strategy_params': {}}, + + # 'forecasting_exogenous': {} + } + + +@pytest.fixture() +def classification_data(): + train_data, test_data = TimeSeriesDatasetsGenerator(num_samples=1800, + task='classification', + max_ts_len=50, + binary=True, + test_size=0.5, + multivariate=False).generate_data() + return train_data, test_data + + +@pytest.mark.parametrize('strategy', STRATEGY) +def test_industrial_strategy(strategy): + cnfg = CONFIGS[strategy] + base = IndustrialStrategy(industrial_strategy_params=None, + industrial_strategy=strategy, + api_config=cnfg, ) + + assert base is not None + + +def test_federated_strategy(classification_data): + train_data, test_data = classification_data + + n_samples = train_data[0].shape[0] + cnfg = CONFIGS['federated_automl'] + industrial = FedotIndustrial(**cnfg) + industrial.fit(train_data) + predict = industrial.predict(test_data) + + assert predict is not None + assert predict.shape[0] == n_samples diff --git a/tests/unit/core/architecture/preprocessing/test_data_convertor.py b/tests/unit/core/architecture/preprocessing/test_data_convertor.py index 91cf296b1..c1b8d7475 100644 --- a/tests/unit/core/architecture/preprocessing/test_data_convertor.py +++ b/tests/unit/core/architecture/preprocessing/test_data_convertor.py @@ -1,7 +1,8 @@ import pytest from fedot.core.data.data import InputData -from fedot_ind.core.architecture.preprocessing.data_convertor import FedotConverter +from fedot_ind.api.utils.data import init_input_data +from fedot_ind.core.architecture.preprocessing.data_convertor import FedotConverter, CustomDatasetCLF from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator @@ -17,3 +18,16 @@ def test_fedot_converter(data): converter = FedotConverter(data=train_data) assert isinstance(converter.input_data, InputData) + + +def get_ts_data(task): + ts_generator = TimeSeriesDatasetsGenerator(task=task) + train_data, test_data = ts_generator.generate_data() + return (init_input_data(train_data[0], train_data[1]), + init_input_data(test_data[0], test_data[1])) + + +def test_custom_dataset_clf(): + train, test = get_ts_data(task='classification') + dataset = CustomDatasetCLF(ts=train) + assert len(dataset) == len(train.target) diff --git a/fedot_ind/core/architecture/pipelines/__init__.py b/tests/unit/core/architecture/settings/__init__.py similarity index 100% rename from fedot_ind/core/architecture/pipelines/__init__.py rename to tests/unit/core/architecture/settings/__init__.py diff --git a/tests/unit/core/architecture/settings/test_computational.py b/tests/unit/core/architecture/settings/test_computational.py new file mode 100644 index 000000000..07b5963ec --- /dev/null +++ b/tests/unit/core/architecture/settings/test_computational.py @@ -0,0 +1,18 @@ +from fedot_ind.core.architecture.settings.computational import BackendMethods, global_imports, default_device +import pytest + + +@pytest.mark.parametrize('device_type', ['CUDA', 'cpu']) +def test_backend_methods(device_type): + backend_methods, backend_scipy = BackendMethods(device_type).backend + assert backend_methods is not None + assert backend_scipy is not None + + +def test_global_imports(): + global_imports('scipy') + + +@pytest.mark.parametrize('device_type', ['CUDA', 'cpu', None]) +def test_default_device(device_type): + default_device(device_type) diff --git a/tests/unit/core/architecture/settings/test_pipeline_factory.py b/tests/unit/core/architecture/settings/test_pipeline_factory.py new file mode 100644 index 000000000..9517de871 --- /dev/null +++ b/tests/unit/core/architecture/settings/test_pipeline_factory.py @@ -0,0 +1,24 @@ +from fedot_ind.core.architecture.settings.pipeline_factory import BasisTransformations, FeatureGenerator, MlModel, KernelFeatureGenerator + + +def test_basis_transformations(): + assert BasisTransformations.datadriven is not None + assert BasisTransformations.wavelet is not None + assert BasisTransformations.Fourier is not None + + +def test_feature_generator(): + assert FeatureGenerator.quantile is not None + assert FeatureGenerator.topological is not None + assert FeatureGenerator.recurrence is not None + + +def test_ml_model(): + assert MlModel.functional_pca is not None + assert MlModel.kalman_filter is not None + assert MlModel.sst is not None + + +def test_kernel_feature_generator(): + assert KernelFeatureGenerator.quantile is not None + assert KernelFeatureGenerator.wavelet is not None diff --git a/tests/unit/core/ensemble/test_RankEnsemble.py b/tests/unit/core/ensemble/test_RankEnsemble.py index 5a4ea0330..3258fc18e 100644 --- a/tests/unit/core/ensemble/test_RankEnsemble.py +++ b/tests/unit/core/ensemble/test_RankEnsemble.py @@ -16,17 +16,16 @@ def get_proba_metric_dict(): return proba_dict, metric_dict -# TODO: uncomment when web loading is fixed -# def test_rank_ensemble_umd(get_proba_metric_dict): -# proba_dict, metric_dict = get_proba_metric_dict -# -# ensembler_umd = RankEnsemble(dataset_name='UMD', -# proba_dict=proba_dict, -# metric_dict=metric_dict) -# result = ensembler_umd.ensemble() -# -# assert result['Base_metric'] == 0.993 -# assert result['Base_model'] == 'fedot_preset' +def test_rank_ensemble_umd(get_proba_metric_dict): + proba_dict, metric_dict = get_proba_metric_dict + + ensembler_umd = RankEnsemble(dataset_name='UMD', + proba_dict=proba_dict, + metric_dict=metric_dict) + result = ensembler_umd.ensemble() + + assert result['Base_metric'] == 0.993 + assert result['Base_model'] == 'fedot_preset' def test__create_models_rank_dict(get_proba_metric_dict): diff --git a/tests/unit/core/ensemble/test_kernel_ensemble.py b/tests/unit/core/ensemble/test_kernel_ensemble.py index 772e3a9e5..437c8159d 100644 --- a/tests/unit/core/ensemble/test_kernel_ensemble.py +++ b/tests/unit/core/ensemble/test_kernel_ensemble.py @@ -1,19 +1,27 @@ +from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator +from fedot_ind.api.main import FedotIndustrial -def test_kernel_ensemble(): - pass - # dataset_name = 'Car' - # metric_names = ('f1', 'accuracy', 'precision', 'roc_auc') - # api_config = dict(problem='classification', - # metric='f1', - # timeout=0.1, - # n_jobs=2, - # industrial_strategy='kernel_automl', - # industrial_strategy_params={}, - # logging_level=60) - # train_data, test_data = DataLoader(dataset_name).load_data() - # industrial = FedotIndustrial(**api_config) - # industrial.fit(train_data) - # predict = industrial.predict(test_data) - # - # assert predict is not None +def classification_data(): + generator = TimeSeriesDatasetsGenerator(task='classification', + binary=True, + multivariate=False) + train_data, test_data = generator.generate_data() + + return train_data, test_data + + +def kernel_ensemble(): + api_config = dict(problem='classification', + metric='f1', + timeout=0.1, + n_jobs=1, + industrial_strategy='kernel_automl', + industrial_strategy_params={}, + logging_level=60) + industrial = FedotIndustrial(**api_config) + train_data, test_data = classification_data() + industrial.fit(train_data) + predict = industrial.predict(test_data) + + assert predict is not None diff --git a/tests/unit/core/models/model_impl/__init__.py b/tests/unit/core/models/model_impl/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/core/models/test_deepar.py b/tests/unit/core/models/model_impl/test_deepar.py similarity index 94% rename from tests/unit/core/models/test_deepar.py rename to tests/unit/core/models/model_impl/test_deepar.py index 436bf4191..f3b31a923 100644 --- a/tests/unit/core/models/test_deepar.py +++ b/tests/unit/core/models/model_impl/test_deepar.py @@ -93,8 +93,8 @@ def test_losses(ts): -1) == p, f'Predictions should have {p} per index for loss {loss_fn}' -def test_get_initial_state(ts): - for cell_type in ['RNN', "LSTM", 'GRU']: - deepar = DeepAR({'cell_type': cell_type}) - deepar.fit(ts) - deepar.predict(ts) +@pytest.mark.parametrize('cell_type', ['RNN', 'LSTM', 'GRU']) +def test_get_initial_state(ts, cell_type): + deepar = DeepAR({'cell_type': cell_type}) + deepar.fit(ts) + deepar.predict(ts) diff --git a/tests/unit/core/models/model_impl/test_dummy_nn.py b/tests/unit/core/models/model_impl/test_dummy_nn.py new file mode 100644 index 000000000..4dd0fa43f --- /dev/null +++ b/tests/unit/core/models/model_impl/test_dummy_nn.py @@ -0,0 +1,10 @@ +import torch + +from fedot_ind.core.models.nn.network_impl.dummy_nn import DummyOverComplicatedNeuralNetwork + + +def test_dummy_nn(): + dummy = DummyOverComplicatedNeuralNetwork(input_dim=1000, output_dim=10) + image = torch.randn(1, 1000, 1000) + output = dummy(image) + assert output.shape == (1, 10) diff --git a/tests/unit/core/models/test_inception.py b/tests/unit/core/models/model_impl/test_inception.py similarity index 100% rename from tests/unit/core/models/test_inception.py rename to tests/unit/core/models/model_impl/test_inception.py diff --git a/tests/unit/core/models/model_impl/test_mini_rocket.py b/tests/unit/core/models/model_impl/test_mini_rocket.py new file mode 100644 index 000000000..8cab87410 --- /dev/null +++ b/tests/unit/core/models/model_impl/test_mini_rocket.py @@ -0,0 +1,43 @@ +import numpy as np +from torch import Tensor + +from fedot_ind.api.utils.data import init_input_data +from fedot_ind.core.models.nn.network_impl.mini_rocket import MiniRocket, MiniRocketExtractor, get_minirocket_features, \ + MiniRocketHead + + +def test_mini_rocket(): + mini_rocket = MiniRocket(input_dim=1000, + output_dim=10, + seq_len=1000, + num_features=10_000) + input_tensor = Tensor(np.random.rand(1, 1000, 1000)) + mini_rocket.fit(input_tensor) + + +def test_get_minirocket_features(): + input_tensor = Tensor(np.random.rand(100, 100, 100)) + model = MiniRocket(input_dim=100, + output_dim=10, + seq_len=100, + num_features=100) + features = get_minirocket_features(data=input_tensor, + model=model, + chunksize=10) + assert features.shape == (100, 10, 1) + + +def test_mini_rocket_head(): + head = MiniRocketHead(input_dim=1000, + output_dim=10) + + assert head(input=Tensor(np.random.rand(2, 1000))).shape == (2, 10) + + +def test_mini_rocket_extractor(): + extractor = MiniRocketExtractor({'num_features': 100}) + input_features = np.random.rand(100, 3, 100) + input_target = np.random.randint(0, 2, 100) + input_data = init_input_data(input_features, input_target) + features = extractor.transform(input_data) + assert features.features.shape == (100, 3, 100) diff --git a/tests/unit/core/models/model_impl/test_nbeats.py b/tests/unit/core/models/model_impl/test_nbeats.py new file mode 100644 index 000000000..cc254ef15 --- /dev/null +++ b/tests/unit/core/models/model_impl/test_nbeats.py @@ -0,0 +1,38 @@ +import numpy as np +import pytest +from fedot.core.data.data import InputData +from fedot.core.data.data_split import train_test_data_setup +from fedot.core.pipelines.pipeline_builder import PipelineBuilder +from fedot.core.repository.dataset_types import DataTypesEnum +from fedot.core.repository.tasks import TsForecastingParams, Task, TaskTypesEnum + +from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels + + +@pytest.fixture(scope='session') +def ts_input_data(): + horizon = 5 + task = Task(TaskTypesEnum.ts_forecasting, + TsForecastingParams(forecast_length=horizon)) + ts = np.random.rand(100) + train_input = InputData(idx=np.arange(0, len(ts)), + features=ts, + target=ts, + task=task, + data_type=DataTypesEnum.ts) + return train_test_data_setup(train_input, validation_blocks=None) + + +def test_nbeats_model(ts_input_data): + train, test = ts_input_data + + with IndustrialModels(): + model = PipelineBuilder().add_node('nbeats_model', params=dict( + backcast_length=10, + forecast_length=5, + epochs=10 + )).build() + + model.fit(train) + forecast = model.predict(test) + assert len(forecast.predict) == 5 diff --git a/tests/unit/core/models/model_impl/test_omni_scale.py b/tests/unit/core/models/model_impl/test_omni_scale.py new file mode 100644 index 000000000..3cb225909 --- /dev/null +++ b/tests/unit/core/models/model_impl/test_omni_scale.py @@ -0,0 +1,35 @@ +import numpy as np +import pytest +from fedot.core.data.data import InputData +from fedot.core.data.data_split import train_test_data_setup +from fedot.core.pipelines.pipeline_builder import PipelineBuilder +from fedot.core.repository.dataset_types import DataTypesEnum +from fedot.core.repository.tasks import Task, TaskTypesEnum + +from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels + + +@pytest.fixture(scope='session') +def ts_input_data(): + task = Task(TaskTypesEnum.classification) + features = np.random.randn(100, 100) + target = np.random.randint(0, 2, 100) + train_input = InputData(idx=np.arange(0, 100), + features=features, + target=target, + task=task, + data_type=DataTypesEnum.table) + return train_test_data_setup(train_input, validation_blocks=None) + + +def test_omniscale_model(ts_input_data): + train, test = ts_input_data + + with IndustrialModels(): + model = PipelineBuilder().add_node('omniscale_model', + params=dict(epochs=10) + ).build() + + model.fit(train) + model.predict(test) + assert model is not None diff --git a/tests/unit/core/models/model_impl/test_patch_tst.py b/tests/unit/core/models/model_impl/test_patch_tst.py new file mode 100644 index 000000000..efca456c1 --- /dev/null +++ b/tests/unit/core/models/model_impl/test_patch_tst.py @@ -0,0 +1,36 @@ +import numpy as np +import pytest +from fedot.core.data.data import InputData +from fedot.core.data.data_split import train_test_data_setup +from fedot.core.pipelines.pipeline_builder import PipelineBuilder +from fedot.core.repository.dataset_types import DataTypesEnum +from fedot.core.repository.tasks import TsForecastingParams, Task, TaskTypesEnum + +from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels + + +@pytest.fixture(scope='session') +def ts_input_data(): + horizon = 5 + task = Task(TaskTypesEnum.ts_forecasting, + TsForecastingParams(forecast_length=horizon)) + ts = np.random.rand(100) + train_input = InputData(idx=np.arange(0, len(ts)), + features=ts, + target=ts, + task=task, + data_type=DataTypesEnum.ts) + return train_test_data_setup(train_input, validation_blocks=None) + + +def test_nbeats_model(ts_input_data): + train, test = ts_input_data + + with IndustrialModels(): + model = PipelineBuilder().add_node('patch_tst_model', params=dict( + epochs=10 + )).build() + + model.fit(train) + forecast = model.predict(test) + assert len(forecast.predict) == 5 diff --git a/tests/unit/core/models/test_resnet.py b/tests/unit/core/models/model_impl/test_resnet.py similarity index 100% rename from tests/unit/core/models/test_resnet.py rename to tests/unit/core/models/model_impl/test_resnet.py diff --git a/tests/unit/core/models/test_tcn.py b/tests/unit/core/models/model_impl/test_tcn.py similarity index 100% rename from tests/unit/core/models/test_tcn.py rename to tests/unit/core/models/model_impl/test_tcn.py diff --git a/tests/unit/core/models/model_impl/test_transformer.py b/tests/unit/core/models/model_impl/test_transformer.py new file mode 100644 index 000000000..40f4a4d3b --- /dev/null +++ b/tests/unit/core/models/model_impl/test_transformer.py @@ -0,0 +1,26 @@ +import numpy as np +import pytest +from fedot.core.operations.operation_parameters import OperationParameters + +from fedot_ind.api.utils.data import init_input_data +from fedot_ind.core.models.nn.network_impl.transformer import TransformerModel + + +@pytest.fixture(scope='session') +def input_data(): + features = np.random.rand(10, 4, 3) + target = np.random.randint(0, 2, 10) + return init_input_data(features, target) + + +def test_transformer_model(input_data): + model = TransformerModel(params=OperationParameters(num_classes=2, + epochs=5, + batch_size=32) + ) + + model.fit(input_data) + pred = model._predict_model(input_data.features) + + assert model is not None + assert pred.predict.shape[0] == input_data.features.shape[0] diff --git a/tests/unit/core/models/model_impl/test_xc_model.py b/tests/unit/core/models/model_impl/test_xc_model.py new file mode 100644 index 000000000..165548dd8 --- /dev/null +++ b/tests/unit/core/models/model_impl/test_xc_model.py @@ -0,0 +1,37 @@ +import warnings + +import numpy as np +import pytest +from fedot.core.operations.operation_parameters import OperationParameters +from matplotlib import get_backend, pyplot as plt + +from fedot_ind.api.utils.data import init_input_data +from fedot_ind.core.models.nn.network_impl.explainable_convolution_model import XCModel + + +@pytest.fixture(scope='session') +def input_data(): + features_train, features_test = np.random.randn(100, 3, 50, 50), np.random.randn(100, 3, 50, 50) + target_train, target_test = np.random.randint(0, 2, 100), np.random.randint(0, 2, 100) + train_input = init_input_data(features_train, target_train) + test_input = init_input_data(features_test, target_test) + return train_input, test_input + + +def test_xcm_model(input_data): + # switch to non-Gui, preventing plots being displayed + # suppress UserWarning that agg cannot show plots + get_backend() + plt.switch_backend("Agg") + warnings.filterwarnings("ignore", "Matplotlib is currently using agg") + train, test = input_data + model = XCModel(params=OperationParameters(num_classes=2, + epochs=10, + batch_size=64)) + model._fit_model(train) + predict = model._predict_model(test.features) + + model.model_for_inference.explain(train) + + assert model is not None + assert predict.predict.shape[0] == test.target.shape[0] diff --git a/tests/unit/core/models/network_modules/__init__.py b/tests/unit/core/models/network_modules/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/core/models/network_modules/test_losses.py b/tests/unit/core/models/network_modules/test_losses.py new file mode 100644 index 000000000..a122d50b9 --- /dev/null +++ b/tests/unit/core/models/network_modules/test_losses.py @@ -0,0 +1,218 @@ +import torch +import torch.nn as nn +from fedot_ind.core.models.nn.network_modules.losses import lambda_prepare, ExpWeightedLoss, HuberLoss, LogCoshLoss, MaskedLossWrapper, CenterLoss, FocalLoss, TweedieLoss, SMAPELoss, RMSELoss + + +def test_lambda_prepare_int(): + val = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) + lambda_ = 5 + result = lambda_prepare(val, lambda_) + assert torch.allclose(result, torch.tensor([[5.0, 5.0]])) + + +def test_lambda_prepare_list(): + val = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) + lambda_ = [1, 2] + result = lambda_prepare(val, lambda_) + assert torch.allclose(result, torch.tensor([[1.0, 2.0]])) + + +def test_lambda_prepare_tensor(): + val = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) + lambda_ = torch.tensor([3.0, 4.0]) + result = lambda_prepare(val, lambda_) + assert torch.allclose(result, torch.tensor([3.0, 4.0])) + + +def test_exp_weighted_loss(): + time_steps = 5 + tolerance = 0.1 + loss_fn = ExpWeightedLoss(time_steps, tolerance) + + input_ = torch.tensor( + [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [9.0, 10.0]]) + target = torch.tensor( + [[1.5, 2.5], [3.5, 4.5], [5.5, 6.5], [7.5, 8.5], [9.5, 10.5]]) + + loss = loss_fn(input_, target) + assert torch.isclose(loss, torch.tensor(1.357), atol=1e-3) + + +def test_huber_loss_mean(): + loss_fn = HuberLoss(reduction='mean', delta=1.0) + + input_ = torch.tensor([1.0, 2.0, 3.0]) + target = torch.tensor([1.5, 2.5, 3.5]) + + loss = loss_fn(input_, target) + assert torch.isclose(loss, torch.tensor(0.125)) + + +def test_huber_loss_sum(): + loss_fn = HuberLoss(reduction='sum', delta=1.0) + + input_ = torch.tensor([1.0, 2.0, 3.0]) + target = torch.tensor([1.5, 2.5, 3.5]) + + loss = loss_fn(input_, target) + assert torch.isclose(loss, torch.tensor(0.375)) + + +def test_huber_loss_none(): + loss_fn = HuberLoss(reduction='none', delta=1.0) + + input_ = torch.tensor([1.0, 2.0, 3.0]) + target = torch.tensor([1.5, 2.5, 3.5]) + + loss = loss_fn(input_, target) + assert torch.allclose(loss, torch.tensor([0.125])) + + +def test_log_cosh_loss_mean(): + loss_fn = LogCoshLoss(reduction='mean', delta=1.0) + + input_ = torch.tensor([1.0, 2.0, 3.0]) + target = torch.tensor([1.5, 2.5, 3.5]) + + loss = loss_fn(input_, target) + assert torch.isclose(loss, torch.tensor(0.12), atol=1e-3) + + +def test_log_cosh_loss_sum(): + loss_fn = LogCoshLoss(reduction='sum', delta=1.0) + + input_ = torch.tensor([1.0, 2.0, 3.0]) + target = torch.tensor([1.5, 2.5, 3.5]) + + loss = loss_fn(input_, target) + assert torch.isclose(loss, torch.tensor(0.36), atol=1e-3) + + +def test_log_cosh_loss_none(): + loss_fn = LogCoshLoss(reduction='none', delta=1.0) + + input_ = torch.tensor([1.0, 2.0, 3.0]) + target = torch.tensor([1.5, 2.5, 3.5]) + + loss = loss_fn(input_, target) + assert torch.allclose(loss, torch.tensor([0.12]), atol=1e-3) + + +def test_masked_loss_wrapper(): + loss_fn = nn.MSELoss() + masked_loss_fn = MaskedLossWrapper(loss_fn) + + input_ = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) + target = torch.tensor([[1.5, float('nan')], [3.5, 4.5]]) + + loss = masked_loss_fn(input_, target) + assert torch.isclose(loss, torch.tensor(0.25)) + + +def test_center_loss(): + c_out = 3 + logits_dim = 2 + loss_fn = CenterLoss(c_out, logits_dim) + + x = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]) + labels = torch.tensor([0, 1, 2]) + + loss = loss_fn(x, labels) + assert torch.isclose(loss, torch.tensor(39.24), atol=100) + + +# def test_center_plus_loss(): +# c_out = 3 +# logits_dim = 2 +# loss_fn = nn.CrossEntropyLoss() +# center_plus_loss_fn = CenterPlusLoss( +# loss_fn, c_out, λ=0.1, logits_dim=logits_dim) +# +# x = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]) +# labels = torch.tensor([0, 1, 2]) +# +# loss = center_plus_loss_fn(x, labels) +# assert torch.isclose(loss, torch.tensor(3.3133), atol=1e-4) + + +def test_focal_loss_mean(): + alpha = torch.tensor([0.25, 0.75]) + gamma = 2.0 + reduction = 'mean' + loss_fn = FocalLoss(alpha, gamma, reduction) + + x = torch.tensor([[0.8, 0.2], [0.3, 0.7]]) + y = torch.tensor([0, 1]) + + loss = loss_fn(x, y) + assert torch.isclose(loss, torch.tensor(0.037), atol=1e-3) + + +def test_focal_loss_sum(): + alpha = None + gamma = 1.5 + reduction = 'sum' + loss_fn = FocalLoss(alpha, gamma, reduction) + + x = torch.tensor([[0.8, 0.2], [0.3, 0.7]]) + y = torch.tensor([0, 1]) + + loss = loss_fn(x, y) + assert torch.isclose(loss, torch.tensor(0.222), atol=1e-3) + + +def test_focal_loss_none(): + alpha = torch.tensor([0.5, 0.5]) + gamma = 1.0 + reduction = 'none' + loss_fn = FocalLoss(alpha, gamma, reduction) + + x = torch.tensor([[0.8, 0.2], [0.3, 0.7]]) + y = torch.tensor([0, 1]) + + loss = loss_fn(x, y) + assert torch.allclose(loss, torch.tensor([0.077, 0.102]), atol=1e-2) + + +def test_tweedie_loss_p1_5(): + p = 1.5 + eps = 1e-8 + loss_fn = TweedieLoss(p, eps) + + inp = torch.tensor([1.0, 2.0, 3.0]) + targ = torch.tensor([1.5, 2.5, 3.5]) + + loss = loss_fn(inp, targ) + assert torch.isclose(loss, torch.tensor(6.289), atol=1e-3) + + +def test_tweedie_loss_p1_8(): + p = 1.8 + eps = 1e-8 + loss_fn = TweedieLoss(p, eps) + + inp = torch.tensor([1.0, 2.0, 3.0]) + targ = torch.tensor([1.5, 2.5, 3.5]) + + loss = loss_fn(inp, targ) + torch.isclose(loss, torch.tensor(7.486), atol=1e-3) + + +def test_smape_loss(): + loss_fn = SMAPELoss() + + input_ = torch.tensor([1.0, 2.0, 3.0]) + target = torch.tensor([1.5, 2.5, 3.5]) + + loss = loss_fn(input_, target) + assert torch.isclose(loss, torch.tensor(25.868), atol=1e-3) + + +def test_rmse_loss(): + loss_fn = RMSELoss() + + input_ = torch.tensor([1.0, 2.0, 3.0]) + target = torch.tensor([1.5, 2.5, 3.5]) + + loss = loss_fn(input_, target) + assert torch.isclose(loss, torch.tensor(0.5)) diff --git a/tests/unit/core/models/network_modules/test_other.py b/tests/unit/core/models/network_modules/test_other.py new file mode 100644 index 000000000..03c9c1c0d --- /dev/null +++ b/tests/unit/core/models/network_modules/test_other.py @@ -0,0 +1,24 @@ +import numpy as np +from torch import Tensor, nn + +from fedot_ind.core.models.nn.network_modules.other import correct_sizes, pass_through, if_module_to_torchscript + + +def test_correct_sizes(): + corrected = correct_sizes(sizes=(4, 1, 8)) + assert corrected == [3, 1, 7] + + +def test_pass_through(): + tensor = Tensor([1, 2, 3]) + assert pass_through(tensor) is tensor + + +def test_if_module_to_torchscript(): + module = nn.Linear(4, 11) + tensor = Tensor(np.random.rand(11, 11, 4)) + assert if_module_to_torchscript(m=module, + inputs=tensor, + script=True, + verbose=True, + serialize=True) diff --git a/tests/unit/core/models/test_fedot_implementation.py b/tests/unit/core/models/test_fedot_implementation.py deleted file mode 100644 index 6c80d427a..000000000 --- a/tests/unit/core/models/test_fedot_implementation.py +++ /dev/null @@ -1,10 +0,0 @@ -# -# -# def test_fedot_classification_implementation(): -# model = FedotClassificationImplementation() -# assert model is not None -# -# -# def test_fedot_regression_implementation(): -# model = FedotRegressionImplementation() -# assert model is not None diff --git a/tests/unit/core/models/test_quantile_extractor.py b/tests/unit/core/models/test_quantile_extractor.py index c39fb77ad..7a04ec58f 100644 --- a/tests/unit/core/models/test_quantile_extractor.py +++ b/tests/unit/core/models/test_quantile_extractor.py @@ -19,9 +19,9 @@ def dataset(n_classes): @pytest.fixture def input_data(): - N_CLASSES = np.random.choice([2, 3]) - X_train, y_train, X_test, y_test = dataset(N_CLASSES) - input_train_data = init_input_data(X_train, y_train) + n_classes = np.random.choice([2, 3]) + x_train, y_train, x_test, y_test = dataset(n_classes) + input_train_data = init_input_data(x_train, y_train) return input_train_data @@ -43,14 +43,9 @@ def test_transform(quantile_extractor, input_data): def test_transform_window(quantile_extractor_window, input_data): - train_features_window = quantile_extractor_window.transform( - input_data=input_data) - quantile_extractor_window.window_size - input_data.features.shape[1] - # expected_n_features = len(stat_methods_global.keys()) + math.ceil(len_ts / (len_ts*window/100)) * len(stat_methods.keys()) + train_features_window = quantile_extractor_window.transform(input_data=input_data) assert train_features_window is not None assert isinstance(train_features_window, OutputData) - # assert expected_n_features == train_features_window.predict.shape[1] def test_extract_features(quantile_extractor): diff --git a/tests/unit/core/models/test_riemann_embeding.py b/tests/unit/core/models/test_riemann_embeding.py new file mode 100644 index 000000000..e96aac7b5 --- /dev/null +++ b/tests/unit/core/models/test_riemann_embeding.py @@ -0,0 +1,96 @@ +import json + +import numpy as np +import pytest +from fedot.core.data.data import OutputData + +from fedot_ind.api.utils.data import init_input_data +from fedot_ind.api.utils.path_lib import PATH_TO_DEFAULT_PARAMS +from fedot_ind.core.models.manifold.riemann_embeding import RiemannExtractor +from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator + + +@pytest.fixture(scope='session') +def input_data(): + (X_train, y_train), (X_test, y_test) = TimeSeriesDatasetsGenerator(num_samples=20, + max_ts_len=50, + binary=True, + multivariate=True, + test_size=0.5).generate_data() + return init_input_data(X_train, y_train), init_input_data(X_test, y_test) + + +@pytest.fixture(scope='session') +def default_params(): + with open(PATH_TO_DEFAULT_PARAMS, 'r') as file: + default_params = json.load(file)['riemann_extractor'] + return default_params + + +def test__init_riemann_extractor(default_params): + riemann_extractor = RiemannExtractor(default_params) + assert riemann_extractor is not None + + +@pytest.mark.parametrize('what_is_none', ('SPD_space', 'tangent_space', 'both')) +def test__init_spaces(default_params, what_is_none): + if what_is_none == 'both': + default_params['SPD_space'] = None + default_params['tangent_space'] = None + else: + default_params['what_is_none'] = None + + riemann_extractor = RiemannExtractor(default_params) + assert riemann_extractor.spd_space is not None + assert riemann_extractor.tangent_space is not None + assert riemann_extractor.shrinkage is not None + + +@pytest.mark.parametrize('fit_stage', (True, False)) +def test_extract_riemann_features(input_data, default_params, fit_stage): + riemann_extractor = RiemannExtractor(default_params) + riemann_extractor.fit_stage = fit_stage + train, test = input_data + ref_point = riemann_extractor.extract_riemann_features(train) + assert ref_point is not None + assert isinstance(ref_point, np.ndarray) + + +@pytest.mark.parametrize('fit_stage', (True, False)) +def test_extract_centroid_distance(input_data, default_params, fit_stage): + riemann_extractor = RiemannExtractor(default_params) + riemann_extractor.fit_stage = fit_stage + train, test = input_data + riemann_extractor.classes_ = np.unique(train.target) + ref_point = riemann_extractor.extract_centroid_distance(train) + assert ref_point is not None + assert isinstance(ref_point, np.ndarray) + + +@pytest.mark.parametrize('extraction_strategy', ('mdm', 'tangent', 'ensemble')) +def test__init_extraction_func(default_params, extraction_strategy): + default_params['extraction_strategy'] = extraction_strategy + riemann_extractor = RiemannExtractor(default_params) + assert riemann_extractor.extraction_func is not None + + +@pytest.mark.parametrize('fit_stage', (True, False)) +def test__ensemble_features(input_data, default_params, fit_stage): + riemann_extractor = RiemannExtractor(default_params) + riemann_extractor.fit_stage = fit_stage + train, test = input_data + riemann_extractor.classes_ = np.unique(train.target) + ref_point = riemann_extractor._ensemble_features(train) + assert ref_point is not None + assert isinstance(ref_point, np.ndarray) + + +@pytest.mark.parametrize('fit_stage', (True, False)) +def test__transform(input_data, default_params, fit_stage): + riemann_extractor = RiemannExtractor(default_params) + riemann_extractor.fit_stage = fit_stage + train, test = input_data + riemann_extractor.classes_ = np.unique(train.target) + ref_point = riemann_extractor.transform(train) + assert ref_point is not None + assert isinstance(ref_point, OutputData) diff --git a/tests/unit/core/models/test_ssa.py b/tests/unit/core/models/test_ssa.py index e63a97f4b..6561b5fa3 100644 --- a/tests/unit/core/models/test_ssa.py +++ b/tests/unit/core/models/test_ssa.py @@ -1,18 +1,34 @@ +import numpy as np +import pytest +from fedot.core.data.data import InputData +from fedot.core.operations.operation_parameters import OperationParameters +from fedot.core.repository.dataset_types import DataTypesEnum +from fedot.core.repository.tasks import Task, TaskTypesEnum +from fedot_ind.core.models.ts_forecasting.ssa_forecaster import SSAForecasterImplementation -def test_ssa(): - pass - # time_series = np.random.normal(size=30) - # task = Task(TaskTypesEnum.ts_forecasting, - # TsForecastingParams(forecast_length=1)) - # train_input = InputData(idx=np.arange(time_series.shape[0]), - # features=time_series, - # target=time_series, - # task=task, - # data_type=DataTypesEnum.ts) - # train_data, test_data = train_test_data_setup(train_input) - # - # with IndustrialModels(): - # pipeline = PipelineBuilder().add_node('ssa_forecaster').build() - # pipeline.fit(train_data) - # assert pipeline is not None +params = OperationParameters(window_size_method="hac", + history_lookback=15, + mode='one_dimensional') + + +@pytest.fixture(scope='session') +def time_series_data(): + ts = np.random.rand(100) + input_data = InputData(idx=np.arange(0, len(ts)), + features=ts.reshape(-1, 1), + target=ts, + task=Task(TaskTypesEnum.ts_forecasting), + data_type=DataTypesEnum.ts, + ) + return input_data + + +def test_predict_for_fit(time_series_data): + forecaster = SSAForecasterImplementation({'mode': 'one_dimensional'}) + forecaster.horizon = 10 + input_data = time_series_data + forecast = forecaster.predict_for_fit(input_data) + + assert forecast is not None + assert forecast.shape[1] == 100 diff --git a/tests/unit/core/operation/interfaces/test_fedot_automl_evaluation_strategy.py b/tests/unit/core/operation/interfaces/test_fedot_automl_evaluation_strategy.py index 8657c9909..9ce36954c 100644 --- a/tests/unit/core/operation/interfaces/test_fedot_automl_evaluation_strategy.py +++ b/tests/unit/core/operation/interfaces/test_fedot_automl_evaluation_strategy.py @@ -1,62 +1,70 @@ -import numpy as np -import pandas as pd import pytest from fedot.core.operations.operation_parameters import OperationParameters - +from fedot_ind.tools.synthetic.ts_datasets_generator import TimeSeriesDatasetsGenerator from fedot_ind.api.utils.data import init_input_data from fedot_ind.core.operation.interfaces.fedot_automl_evaluation_strategy import FedotAutoMLClassificationStrategy, \ FedotAutoMLRegressionStrategy @pytest.fixture() -def regression_data(): - features = np.random.rand(10, 10) - target = np.random.rand(10, 1) +def classification_data(): + generator = TimeSeriesDatasetsGenerator(task='classification', + binary=True, + multivariate=False) + train_data, _ = generator.generate_data() + return init_input_data( - X=pd.DataFrame(features), - y=target, - task='regression') + train_data[0], + train_data[1], + task='classification') @pytest.fixture() -def classification_data(): - features = np.random.rand(10, 10) - target = np.random.randint(2, size=10) +def regression_data(): + generator = TimeSeriesDatasetsGenerator(task='regression', + binary=True, + multivariate=False) + train_data, _ = generator.generate_data() + return init_input_data( - X=pd.DataFrame(features), - y=target, - task='classification') + train_data[0], + train_data[1].reshape(-1, 1), + task='regression') -def test_fedot_automl_classification_strategy_fit(classification_data): +def fedot_automl_classification_strategy_fit(classification_data): operation_type = 'fedot_cls' params = OperationParameters() - params._parameters.update({'problem': 'classification', 'timeout': 0.5}) + params._parameters.update( + {'problem': 'classification', 'timeout': 0.1, 'n_jobs': 1}) strategy = FedotAutoMLClassificationStrategy(operation_type=operation_type, params=params) - # trained_operation = strategy.fit(classification_data) + trained_operation = strategy.fit(classification_data) - # predict = strategy.predict(trained_operation, classification_data) - # predict_for_fit = strategy.predict_for_fit(trained_operation, classification_data) + predict = strategy.predict(trained_operation, classification_data) + predict_for_fit = strategy.predict_for_fit( + trained_operation, classification_data) - # assert predict.predict is not None - # assert predict_for_fit.predict is not None + assert predict.predict is not None + assert predict_for_fit.predict is not None assert strategy.operation_impl is not None - # assert trained_operation is not None + assert trained_operation is not None -def test_fedot_automl_regression_strategy_fit(regression_data): +def fedot_automl_regression_strategy_fit(regression_data): operation_type = 'fedot_regr' params = OperationParameters() - params._parameters.update({'problem': 'regression', 'timeout': 0.1}) + params._parameters.update( + {'problem': 'regression', 'timeout': 0.1, 'n_jobs': 1}) strategy = FedotAutoMLRegressionStrategy(operation_type=operation_type, params=params) - # trained_operation = strategy.fit(regression_data) - # - # predict = strategy.predict(trained_operation, regression_data) - # predict_for_fit = strategy.predict_for_fit(trained_operation, regression_data) + trained_operation = strategy.fit(regression_data) + + predict = strategy.predict(trained_operation, regression_data) + predict_for_fit = strategy.predict_for_fit( + trained_operation, regression_data) - # assert predict.predict is not None - # assert predict_for_fit.predict is not None + assert predict.predict is not None + assert predict_for_fit.predict is not None assert strategy.operation_impl is not None - # assert trained_operation is not None + assert trained_operation is not None diff --git a/tests/unit/core/operation/transformation/data/test_point_cloud.py b/tests/unit/core/operation/transformation/data/test_point_cloud.py index d43a89aed..0cd852127 100644 --- a/tests/unit/core/operation/transformation/data/test_point_cloud.py +++ b/tests/unit/core/operation/transformation/data/test_point_cloud.py @@ -23,13 +23,14 @@ def test_TSTransformer(basic_periodic_data): assert result.shape[0] > 0 and result.shape[1] > 0 -# def test_TopologicalTransformation_time_series_rolling_betti_ripser(basic_periodic_data): -# topological_transformer = TopologicalTransformation( -# time_series=basic_periodic_data, -# max_simplex_dim=1, -# epsilon=3, -# window_length=400) -# assert len(topological_transformer.time_series_rolling_betti_ripser(basic_periodic_data)) != 0 +def test_TopologicalTransformation_time_series_rolling_betti_ripser( + basic_periodic_data): + topological_transformer = TopologicalTransformation( + time_series=basic_periodic_data, + max_simplex_dim=1, + epsilon=3, + window_length=400) + assert len(topological_transformer.time_series_rolling_betti_ripser(basic_periodic_data)) != 0 def test_TopologicalTransformation_time_series_to_point_cloud( diff --git a/tests/unit/models/classification_pipelines.py b/tests/unit/models/classification_pipelines.py deleted file mode 100644 index 29f8f95a3..000000000 --- a/tests/unit/models/classification_pipelines.py +++ /dev/null @@ -1,122 +0,0 @@ -from fedot_ind.core.architecture.pipelines.abstract_pipeline import AbstractPipeline -from fedot_ind.core.repository.constanst_repository import VALID_LINEAR_CLF_PIPELINE - -UNI_MULTI_CLF = ['Earthquakes', 'ERing'] -TASK = 'classification' - - -def test_quantile_clf( - node_list=VALID_LINEAR_CLF_PIPELINE['statistical_clf'], - data_list=None): - if data_list is None: - data_list = UNI_MULTI_CLF - result = [ - AbstractPipeline( - task=TASK).evaluate_pipeline( - node_list, - data) for data in data_list] - assert result is not None - - -def test_quantile_lgbm_clf( - node_list=VALID_LINEAR_CLF_PIPELINE['statistical_lgbm'], - data_list=None): - if data_list is None: - data_list = UNI_MULTI_CLF - result = [ - AbstractPipeline( - task=TASK).evaluate_pipeline( - node_list, - data) for data in data_list] - assert result is not None - - -def test_riemann_clf( - node_list=VALID_LINEAR_CLF_PIPELINE['riemann_clf'], - data_list=None): - if data_list is None: - data_list = UNI_MULTI_CLF - result = [ - AbstractPipeline( - task=TASK).evaluate_pipeline( - node_list, - data) for data in data_list] - assert result is not None - - -def test_recurrence_clf( - node_list=VALID_LINEAR_CLF_PIPELINE['recurrence_clf'], - data_list=None): - if data_list is None: - data_list = UNI_MULTI_CLF - result = [ - AbstractPipeline( - task=TASK).evaluate_pipeline( - node_list, - data) for data in data_list] - assert result is not None - - -def test_wavelet_clf( - node_list=VALID_LINEAR_CLF_PIPELINE['wavelet_statistical'], - data_list=None): - if data_list is None: - data_list = UNI_MULTI_CLF - result = [ - AbstractPipeline( - task=TASK).evaluate_pipeline( - node_list, - data) for data in data_list] - assert result is not None - - -def test_fourier_clf( - node_list=VALID_LINEAR_CLF_PIPELINE['fourier_statistical'], - data_list=None): - if data_list is None: - data_list = UNI_MULTI_CLF - result = [ - AbstractPipeline( - task=TASK).evaluate_pipeline( - node_list, - data) for data in data_list] - assert result is not None - - -def test_eigen_clf( - node_list=VALID_LINEAR_CLF_PIPELINE['eigen_statistical'], - data_list=None): - if data_list is None: - data_list = UNI_MULTI_CLF - result = [ - AbstractPipeline( - task=TASK).evaluate_pipeline( - node_list, - data) for data in data_list] - assert result is not None - - -def test_channel_filtration_clf( - node_list=VALID_LINEAR_CLF_PIPELINE['channel_filtration_statistical'], - data_list=None): - if data_list is None: - data_list = UNI_MULTI_CLF - result = [ - AbstractPipeline( - task=TASK).evaluate_pipeline( - node_list, - data) for data in data_list] - assert result is not None - - -def test_composite_clf_pipeline( - node_list=VALID_LINEAR_CLF_PIPELINE['composite_clf'], - data_list=None): - if data_list is None: - data_list = UNI_MULTI_CLF - result = [ - AbstractPipeline( - task=TASK).evaluate_pipeline( - node_list, - data) for data in data_list] - assert result is not None diff --git a/tests/unit/models/forecasting_pipelines.py b/tests/unit/models/forecasting_pipelines.py index e60bee863..150c5b635 100644 --- a/tests/unit/models/forecasting_pipelines.py +++ b/tests/unit/models/forecasting_pipelines.py @@ -1,4 +1,3 @@ - from fedot_ind.core.architecture.pipelines.abstract_pipeline import AbstractPipeline from fedot_ind.core.repository.constanst_repository import VALID_LINEAR_TSF_PIPELINE diff --git a/tests/unit/models/test_classification_pipelines.py b/tests/unit/models/test_classification_pipelines.py new file mode 100644 index 000000000..2256cb26b --- /dev/null +++ b/tests/unit/models/test_classification_pipelines.py @@ -0,0 +1,15 @@ +import pytest + +from fedot_ind.core.architecture.pipelines.abstract_pipeline import AbstractPipeline +from fedot_ind.core.repository.constanst_repository import VALID_LINEAR_CLF_PIPELINE + +UNI_MULTI_CLF = ['Earthquakes', 'ERing'] +TASK = 'classification' + + +@pytest.mark.parametrize('node_list', [VALID_LINEAR_CLF_PIPELINE['statistical_clf']]) +def test_clf(node_list, data_list=None): + if data_list is None: + data_list = UNI_MULTI_CLF + result = [AbstractPipeline(task=TASK).evaluate_pipeline(node_list, data) for data in data_list] + assert result is not None diff --git a/tests/unit/tools/test_example_utils.py b/tests/unit/tools/test_example_utils.py index c251833c9..30013c8b9 100644 --- a/tests/unit/tools/test_example_utils.py +++ b/tests/unit/tools/test_example_utils.py @@ -1,4 +1,33 @@ from fedot_ind.tools.example_utils import evaluate_metric +from fedot_ind.tools.example_utils import get_ts_data +import pytest + + +# @pytest.mark.parametrize('name', +# ['m4_daily', +# 'm4_weekly', +# 'm4_monthly', +# 'm4_quarterly', +# 'm4_yearly']) +@pytest.mark.parametrize('group', + ['Daily', + 'Weekly', + 'Monthly', + 'Quarterly', + 'Yearly']) +def test_get_ts_data(group): + # ds_ids = {'d': 3530, + # 'w': 124, + # 'm': 14148, + # 'q': 12090, + # 'y': 3917} + # idx = str.find(name, '_') + # ds_name = str.capitalize(name[idx + 1]) + str(ds_ids[name[idx + 1]]) + train_data, test_data, label = get_ts_data(dataset=f'M4_{group}', + horizon=30, + m4_id=None) + assert train_data is not None + assert test_data is not None def test_evaluate_metric(): diff --git a/tests/unit/tools/test_load_data.py b/tests/unit/tools/test_load_data.py index ed182910c..c56ac67e7 100644 --- a/tests/unit/tools/test_load_data.py +++ b/tests/unit/tools/test_load_data.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd +import pytest from fedot_ind.api.utils.path_lib import PROJECT_PATH from fedot_ind.tools.loader import DataLoader @@ -19,64 +20,39 @@ def test_init_loader(): def test_load_multivariate_data(): - # TODO: get back to loading from web when it is fixed - # train_data, test_data = DataLoader('Epilepsy').load_data() - - # delete when loading from web is fixed - path_folder = os.path.join(PROJECT_PATH, 'tests', 'data', 'datasets') - train_data, test_data = DataLoader( - 'Blink', folder=path_folder).load_data() # remove folder=path_folder also - # TODO: get back to loading from web when it is fixed - # train_data, test_data = DataLoader('Epilepsy').load_data() - - # delete when loading from web is fixed - path_folder = os.path.join(PROJECT_PATH, 'tests', 'data', 'datasets') - train_data, test_data = DataLoader( - 'Blink', folder=path_folder).load_data() # remove folder=path_folder also + + train_data, test_data = DataLoader('Epilepsy').load_data() + x_train, y_train = train_data + x_test, y_test = test_data + assert x_train.shape == (137, 3, 206) + assert x_test.shape == (138, 3, 206) + assert y_train.shape == (137,) + assert y_test.shape == (138,) + + +def test_load_univariate_data(): + train_data, test_data = DataLoader('DodgerLoopDay').load_data() x_train, y_train = train_data x_test, y_test = test_data - assert x_train.shape == (500, 4, 510) - assert x_test.shape == (450, 4, 510) - assert y_train.shape == (500,) - assert y_test.shape == (450,) - assert x_train.shape == (500, 4, 510) - assert x_test.shape == (450, 4, 510) - assert y_train.shape == (500,) - assert y_test.shape == (450,) + assert x_train.shape == (78, 288) + assert x_test.shape == (80, 288) + assert y_train.shape == (78,) + assert y_test.shape == (80,) + + +def test_load_fake_data(): + with pytest.raises(FileNotFoundError): + DataLoader('Fake').load_data() def test_load_univariate_data(): - # train_data, test_data = DataLoader('DodgerLoopDay').load_data() - - # delete when loading from web is fixed - path_folder = os.path.join(PROJECT_PATH, 'tests', 'data', 'datasets') - train_data, test_data = DataLoader('ItalyPowerDemand_tsv', # change to 'DodgerLoopDay' and adjust shapes below - folder=path_folder).load_data() # remove folder=path_folder also - # train_data, test_data = DataLoader('DodgerLoopDay').load_data() - - # delete when loading from web is fixed - path_folder = os.path.join(PROJECT_PATH, 'tests', 'data', 'datasets') - train_data, test_data = DataLoader('ItalyPowerDemand_tsv', # change to 'DodgerLoopDay' and adjust shapes below - folder=path_folder).load_data() # remove folder=path_folder also + train_data, test_data = DataLoader('DodgerLoopDay').load_data() x_train, y_train = train_data x_test, y_test = test_data - assert x_train.shape == (67, 24) - assert x_test.shape == (67, 24) - assert y_train.shape == (67,) - assert y_test.shape == (67,) - assert x_train.shape == (67, 24) - assert x_test.shape == (67, 24) - assert y_train.shape == (67,) - assert y_test.shape == (67,) - -# TODO: uncomment when loading from web is fixed -# def test_load_fake_data(): -# with pytest.raises(FileNotFoundError): -# DataLoader('Fake').load_data() -# TODO: uncomment when loading from web is fixed -# def test_load_fake_data(): -# with pytest.raises(FileNotFoundError): -# DataLoader('Fake').load_data() + assert x_train.shape == (78, 288) + assert x_test.shape == (80, 288) + assert y_train.shape == (78,) + assert y_test.shape == (80,) def test__load_from_tsfile_to_dataframe():