diff --git a/fedot_ind/core/architecture/datasets/splitters.py b/fedot_ind/core/architecture/datasets/splitters.py index 81ef6481a..906712aac 100644 --- a/fedot_ind/core/architecture/datasets/splitters.py +++ b/fedot_ind/core/architecture/datasets/splitters.py @@ -1,7 +1,7 @@ """ This module contains functions for splitting a torch dataset into parts. """ -from typing import List, Tuple, Generator, Optional, Dict +from typing import Dict, Generator, List, Optional, Tuple import numpy as np from torch.utils.data import Dataset, Subset @@ -44,6 +44,7 @@ def k_fold(dataset: Dataset, n: int) -> Generator[Tuple[Subset, Subset], None, N train_ds = Subset(dataset, train_indices) yield train_ds, test_ds + def split_data(dataset: Dataset, n: int, verbose: bool = False) -> List[np.ndarray]: """ Splits the data into n parts, keeping the proportions of the classes. diff --git a/fedot_ind/core/architecture/experiment/TimeSeriesAnomalyDetection.py b/fedot_ind/core/architecture/experiment/TimeSeriesAnomalyDetection.py index a7181ea78..89c58aecc 100644 --- a/fedot_ind/core/architecture/experiment/TimeSeriesAnomalyDetection.py +++ b/fedot_ind/core/architecture/experiment/TimeSeriesAnomalyDetection.py @@ -137,10 +137,12 @@ def _build_pipeline(self): for index, (basis, extractor) in enumerate(zip(self.branch_nodes, self.extractors)): pipeline_builder.add_node(basis, branch_idx=index) pipeline_builder.add_node(extractor, branch_idx=index) - pipeline_builder.join_branches('mlp', params={'hidden_layer_sizes': (256, 128, 64, 32), - 'max_iter': 300, - 'activation': 'relu', - 'solver': 'adam', }) + pipeline_builder.join_branches('rf') + + # pipeline_builder.join_branches('mlp', params={'hidden_layer_sizes': (256, 128, 64, 32), + # 'max_iter': 300, + # 'activation': 'relu', + # 'solver': 'adam', }) return pipeline_builder.build() diff --git a/fedot_ind/tools/loader.py b/fedot_ind/tools/loader.py index 2088a2b93..85c107ba1 100644 --- a/fedot_ind/tools/loader.py +++ b/fedot_ind/tools/loader.py @@ -16,11 +16,12 @@ class DataLoader: - """Class for reading data from ``tsv`` files and downloading from UCR archive if not found locally. - At the moment supports only ``.txt`` and ``.arff`` formats, but not relational ``.arff`` or ``.ts`` files. + """Class for reading data files and downloading from UCR archive if not found locally. + At the moment supports ``.ts``, ``.txt``, ``.tsv``, and ``.arff`` formats. Args: dataset_name: name of dataset + folder: path to folder with data Examples: >>> data_loader = DataLoader('ItalyPowerDemand') @@ -787,8 +788,3 @@ def extract_data(self, dataset_name: str, data_path: str): return (x_train, y_train), (x_test, y_test) else: return (pd.DataFrame(x_train), y_train), (pd.DataFrame(x_test), y_test) - - -if __name__ == '__main__': - data_loader = DataLoader('AppliancesEnergy') - _train_data, _test_data = data_loader.load_data() diff --git a/tests/unit/core/architecture/datasets/test_splitters.py b/tests/unit/core/architecture/datasets/test_splitters.py index 6757a6a6c..c2f23f02c 100644 --- a/tests/unit/core/architecture/datasets/test_splitters.py +++ b/tests/unit/core/architecture/datasets/test_splitters.py @@ -5,7 +5,7 @@ from torchvision.datasets import ImageFolder from torchvision.transforms import ToTensor -from fedot_ind.core.architecture.datasets.splitters import k_fold, split_data +from fedot_ind.core.architecture.datasets.splitters import k_fold, split_data, undersampling, dataset_info, get_dataset_mean_std, train_test_split from fedot_ind.api.utils.path_lib import PROJECT_PATH DATASETS_PATH = os.path.abspath(PROJECT_PATH + '/tests/data/datasets') @@ -18,8 +18,13 @@ def dataset(): yield ImageFolder(root=path, transform=ToTensor()) +def test_train_test_split(dataset): + train_ds, test_ds = train_test_split(dataset, p=0.2) + assert len(train_ds) + len(test_ds) == len(dataset) + + def test_split_data(dataset): - fold_indices = split_data(dataset, n=3) + fold_indices = split_data(dataset, n=3, verbose=True) assert np.array_equal(np.sort(np.concatenate(fold_indices)), np.arange(len(dataset))) assert fold_indices[0].size == 21 assert fold_indices[1].size == 20 @@ -29,3 +34,21 @@ def test_split_data(dataset): def test_k_fold(dataset): for train_ds, val_ds in k_fold(dataset, 3): assert len(train_ds) + len(val_ds) == len(dataset) + + +def test_undersampling(dataset): + balanced = undersampling(dataset=dataset, n=3, verbose=True) + assert len(balanced) == 9 + + +def test_dataset_info(dataset): + result = dataset_info(dataset=dataset, verbose=True) + assert isinstance(result, dict) + + +def test_get_dataset_mean_std(dataset): + mean, std = get_dataset_mean_std(dataset=dataset) + assert isinstance(mean, tuple) + assert isinstance(std, tuple) + assert len(mean) == 3 + assert len(std) == 3 \ No newline at end of file diff --git a/tests/unit/core/architecture/experiment/test_TimeSeriesAnomalyDetection.py b/tests/unit/core/architecture/experiment/test_TimeSeriesAnomalyDetection.py index fbe68ae04..9ced5e4f6 100644 --- a/tests/unit/core/architecture/experiment/test_TimeSeriesAnomalyDetection.py +++ b/tests/unit/core/architecture/experiment/test_TimeSeriesAnomalyDetection.py @@ -1,3 +1,45 @@ from fedot_ind.core.architecture.experiment.TimeSeriesAnomalyDetection import TimeSeriesAnomalyDetectionPreset +from fedot_ind.tools.synthetic.ts_generator import TimeSeriesGenerator +import pytest + +@pytest.fixture() +def time_series(): + ts_config = {'ts_type': 'random_walk', + 'length': 1000, + 'start_val': 36.6} + ts = TimeSeriesGenerator(ts_config).get_ts() + return ts + + +@pytest.fixture() +def anomaly_dict(): + anomaly_d = {'anomaly1': [[40, 50], [60, 80], [200, 220]], + 'anomaly2': [[300, 320], [400, 420], [600, 620]]} + return anomaly_d + + +@pytest.fixture() +def detector(): + params = dict(branch_nodes=['eigen_basis'], + dataset='test', + tuning_iterations=1, + tuning_timeout=1, + model_params=dict(problem='classification', + timeout=0.5, + n_jobs=1, + logging_level=50)) + detector = TimeSeriesAnomalyDetectionPreset(params) + return detector + + +def test_fit_predict(detector, time_series, anomaly_dict): + try: + detector.fit(time_series, anomaly_dict) + except Exception as ex: + detector.fit(time_series, anomaly_dict) + labels = detector.predict(time_series) + proba = detector.predict_proba(time_series) + metrics = detector.get_metrics(time_series, metric_names=['f1', 'roc_auc']) + assert detector.auto_model.current_pipeline.is_fitted is True diff --git a/tests/unit/core/architecture/experiment/test_TimeSeriesRegression.py b/tests/unit/core/architecture/experiment/test_TimeSeriesRegression.py index 39ebfd71d..caa182d05 100644 --- a/tests/unit/core/architecture/experiment/test_TimeSeriesRegression.py +++ b/tests/unit/core/architecture/experiment/test_TimeSeriesRegression.py @@ -1,14 +1,18 @@ +import os + +import numpy as np import pytest +from fedot_ind.api.utils.path_lib import PROJECT_PATH from fedot_ind.core.architecture.experiment.TimeSeriesRegression import TimeSeriesRegression from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor - +from fedot_ind.tools.loader import DataLoader @pytest.fixture def params(): return dict(strategy='quantile', model_params={'problem': 'regression', - 'timeout': 1, + 'timeout': 0.5, 'n_jobs': 2, 'metric': 'rmse'}, generator_class=QuantileExtractor({'window_mode': True, 'window_size': 20}), @@ -23,6 +27,14 @@ def regressor(params): return TimeSeriesRegression(params) +@pytest.fixture() +def dataset(): + path = os.path.join(PROJECT_PATH, 'examples/data/') + loader = DataLoader(dataset_name='BitcoinSentiment', + folder=path) + return loader.load_data() + + def test_init(regressor): assert regressor.dataset_name == 'ApplianceEnergy' assert isinstance(regressor.generator_runner, QuantileExtractor) @@ -31,3 +43,13 @@ def test_init(regressor): assert regressor.pca.n_components == 0.9 assert regressor.pca.svd_solver == 'full' assert regressor.model_hyperparams['metric'] == 'rmse' + + +def test_fit_predict(regressor, dataset): + (X_train, y_train), (X_test, y_test) = dataset + regressor.fit(X_train, y_train) + predict = regressor.predict(X_test, y_test) + metrics = regressor.get_metrics(target=y_test, metric_names=['rmse', 'mae', 'r2']) + + assert isinstance(predict, np.ndarray) + assert isinstance(metrics, dict) diff --git a/tests/unit/tools/test_load_data.py b/tests/unit/tools/test_load_data.py index 15ebf8af9..5f597e2ef 100644 --- a/tests/unit/tools/test_load_data.py +++ b/tests/unit/tools/test_load_data.py @@ -50,6 +50,14 @@ def test__load_from_tsfile_to_dataframe(): full_path = os.path.join(PROJECT_PATH, 'examples/data/BitcoinSentiment/BitcoinSentiment_TEST.ts') x, y = loader._load_from_tsfile_to_dataframe(full_file_path_and_name=full_path, return_separate_X_and_y=True) + +def test__load_from_tsfile_to_dataframe_with_timestamps(): + ds_name = 'name' + path = '.' + loader = DataLoader(dataset_name=ds_name, folder=path) + full_path = os.path.join(PROJECT_PATH, 'examples/data/AppliancesEnergy/AppliancesEnergy_TEST.ts') + x, y = loader._load_from_tsfile_to_dataframe(full_file_path_and_name=full_path, return_separate_X_and_y=True) + assert isinstance(x, pd.DataFrame) assert isinstance(y, np.ndarray) assert x.shape[0] == y.shape[0] @@ -100,6 +108,18 @@ def test_read_arff_files(): assert i is not None +def test_read_tsv(): + ds_name = 'name' + path = '.' + loader = DataLoader(dataset_name=ds_name, folder=path) + path = os.path.join(PROJECT_PATH, 'tests', 'data', 'datasets') + x_train, y_train, x_test, y_test = loader.read_tsv(dataset_name='ItalyPowerDemand_tsv', + data_path=path) + + for i in [x_train, y_train, x_test, y_test]: + assert i is not None + + def test_read_train_test_files(): ds_name = 'name' path = '.'