Skip to content

Commit

Permalink
added new tests, enhanced old ones
Browse files Browse the repository at this point in the history
  • Loading branch information
technocreep committed Nov 16, 2023
1 parent 7dde717 commit 0742766
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 16 deletions.
3 changes: 2 additions & 1 deletion fedot_ind/core/architecture/datasets/splitters.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
This module contains functions for splitting a torch dataset into parts.
"""
from typing import List, Tuple, Generator, Optional, Dict
from typing import Dict, Generator, List, Optional, Tuple

import numpy as np
from torch.utils.data import Dataset, Subset
Expand Down Expand Up @@ -44,6 +44,7 @@ def k_fold(dataset: Dataset, n: int) -> Generator[Tuple[Subset, Subset], None, N
train_ds = Subset(dataset, train_indices)
yield train_ds, test_ds


def split_data(dataset: Dataset, n: int, verbose: bool = False) -> List[np.ndarray]:
"""
Splits the data into n parts, keeping the proportions of the classes.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,12 @@ def _build_pipeline(self):
for index, (basis, extractor) in enumerate(zip(self.branch_nodes, self.extractors)):
pipeline_builder.add_node(basis, branch_idx=index)
pipeline_builder.add_node(extractor, branch_idx=index)
pipeline_builder.join_branches('mlp', params={'hidden_layer_sizes': (256, 128, 64, 32),
'max_iter': 300,
'activation': 'relu',
'solver': 'adam', })
pipeline_builder.join_branches('rf')

# pipeline_builder.join_branches('mlp', params={'hidden_layer_sizes': (256, 128, 64, 32),
# 'max_iter': 300,
# 'activation': 'relu',
# 'solver': 'adam', })

return pipeline_builder.build()

Expand Down
10 changes: 3 additions & 7 deletions fedot_ind/tools/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@


class DataLoader:
"""Class for reading data from ``tsv`` files and downloading from UCR archive if not found locally.
At the moment supports only ``.txt`` and ``.arff`` formats, but not relational ``.arff`` or ``.ts`` files.
"""Class for reading data files and downloading from UCR archive if not found locally.
At the moment supports ``.ts``, ``.txt``, ``.tsv``, and ``.arff`` formats.
Args:
dataset_name: name of dataset
folder: path to folder with data
Examples:
>>> data_loader = DataLoader('ItalyPowerDemand')
Expand Down Expand Up @@ -787,8 +788,3 @@ def extract_data(self, dataset_name: str, data_path: str):
return (x_train, y_train), (x_test, y_test)
else:
return (pd.DataFrame(x_train), y_train), (pd.DataFrame(x_test), y_test)


if __name__ == '__main__':
data_loader = DataLoader('AppliancesEnergy')
_train_data, _test_data = data_loader.load_data()
27 changes: 25 additions & 2 deletions tests/unit/core/architecture/datasets/test_splitters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor

from fedot_ind.core.architecture.datasets.splitters import k_fold, split_data
from fedot_ind.core.architecture.datasets.splitters import k_fold, split_data, undersampling, dataset_info, get_dataset_mean_std, train_test_split
from fedot_ind.api.utils.path_lib import PROJECT_PATH

DATASETS_PATH = os.path.abspath(PROJECT_PATH + '/tests/data/datasets')
Expand All @@ -18,8 +18,13 @@ def dataset():
yield ImageFolder(root=path, transform=ToTensor())


def test_train_test_split(dataset):
train_ds, test_ds = train_test_split(dataset, p=0.2)
assert len(train_ds) + len(test_ds) == len(dataset)


def test_split_data(dataset):
fold_indices = split_data(dataset, n=3)
fold_indices = split_data(dataset, n=3, verbose=True)
assert np.array_equal(np.sort(np.concatenate(fold_indices)), np.arange(len(dataset)))
assert fold_indices[0].size == 21
assert fold_indices[1].size == 20
Expand All @@ -29,3 +34,21 @@ def test_split_data(dataset):
def test_k_fold(dataset):
for train_ds, val_ds in k_fold(dataset, 3):
assert len(train_ds) + len(val_ds) == len(dataset)


def test_undersampling(dataset):
balanced = undersampling(dataset=dataset, n=3, verbose=True)
assert len(balanced) == 9


def test_dataset_info(dataset):
result = dataset_info(dataset=dataset, verbose=True)
assert isinstance(result, dict)


def test_get_dataset_mean_std(dataset):
mean, std = get_dataset_mean_std(dataset=dataset)
assert isinstance(mean, tuple)
assert isinstance(std, tuple)
assert len(mean) == 3
assert len(std) == 3
Original file line number Diff line number Diff line change
@@ -1,3 +1,45 @@
from fedot_ind.core.architecture.experiment.TimeSeriesAnomalyDetection import TimeSeriesAnomalyDetectionPreset
from fedot_ind.tools.synthetic.ts_generator import TimeSeriesGenerator

import pytest


@pytest.fixture()
def time_series():
ts_config = {'ts_type': 'random_walk',
'length': 1000,
'start_val': 36.6}
ts = TimeSeriesGenerator(ts_config).get_ts()
return ts


@pytest.fixture()
def anomaly_dict():
anomaly_d = {'anomaly1': [[40, 50], [60, 80], [200, 220]],
'anomaly2': [[300, 320], [400, 420], [600, 620]]}
return anomaly_d


@pytest.fixture()
def detector():
params = dict(branch_nodes=['eigen_basis'],
dataset='test',
tuning_iterations=1,
tuning_timeout=1,
model_params=dict(problem='classification',
timeout=0.5,
n_jobs=1,
logging_level=50))
detector = TimeSeriesAnomalyDetectionPreset(params)
return detector


def test_fit_predict(detector, time_series, anomaly_dict):
try:
detector.fit(time_series, anomaly_dict)
except Exception as ex:
detector.fit(time_series, anomaly_dict)
labels = detector.predict(time_series)
proba = detector.predict_proba(time_series)
metrics = detector.get_metrics(time_series, metric_names=['f1', 'roc_auc'])
assert detector.auto_model.current_pipeline.is_fitted is True
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import os

import numpy as np
import pytest

from fedot_ind.api.utils.path_lib import PROJECT_PATH
from fedot_ind.core.architecture.experiment.TimeSeriesRegression import TimeSeriesRegression
from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor

from fedot_ind.tools.loader import DataLoader

@pytest.fixture
def params():
return dict(strategy='quantile',
model_params={'problem': 'regression',
'timeout': 1,
'timeout': 0.5,
'n_jobs': 2,
'metric': 'rmse'},
generator_class=QuantileExtractor({'window_mode': True, 'window_size': 20}),
Expand All @@ -23,6 +27,14 @@ def regressor(params):
return TimeSeriesRegression(params)


@pytest.fixture()
def dataset():
path = os.path.join(PROJECT_PATH, 'examples/data/')
loader = DataLoader(dataset_name='BitcoinSentiment',
folder=path)
return loader.load_data()


def test_init(regressor):
assert regressor.dataset_name == 'ApplianceEnergy'
assert isinstance(regressor.generator_runner, QuantileExtractor)
Expand All @@ -31,3 +43,13 @@ def test_init(regressor):
assert regressor.pca.n_components == 0.9
assert regressor.pca.svd_solver == 'full'
assert regressor.model_hyperparams['metric'] == 'rmse'


def test_fit_predict(regressor, dataset):
(X_train, y_train), (X_test, y_test) = dataset
regressor.fit(X_train, y_train)
predict = regressor.predict(X_test, y_test)
metrics = regressor.get_metrics(target=y_test, metric_names=['rmse', 'mae', 'r2'])

assert isinstance(predict, np.ndarray)
assert isinstance(metrics, dict)
20 changes: 20 additions & 0 deletions tests/unit/tools/test_load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ def test__load_from_tsfile_to_dataframe():
full_path = os.path.join(PROJECT_PATH, 'examples/data/BitcoinSentiment/BitcoinSentiment_TEST.ts')
x, y = loader._load_from_tsfile_to_dataframe(full_file_path_and_name=full_path, return_separate_X_and_y=True)


def test__load_from_tsfile_to_dataframe_with_timestamps():
ds_name = 'name'
path = '.'
loader = DataLoader(dataset_name=ds_name, folder=path)
full_path = os.path.join(PROJECT_PATH, 'examples/data/AppliancesEnergy/AppliancesEnergy_TEST.ts')
x, y = loader._load_from_tsfile_to_dataframe(full_file_path_and_name=full_path, return_separate_X_and_y=True)

assert isinstance(x, pd.DataFrame)
assert isinstance(y, np.ndarray)
assert x.shape[0] == y.shape[0]
Expand Down Expand Up @@ -100,6 +108,18 @@ def test_read_arff_files():
assert i is not None


def test_read_tsv():
ds_name = 'name'
path = '.'
loader = DataLoader(dataset_name=ds_name, folder=path)
path = os.path.join(PROJECT_PATH, 'tests', 'data', 'datasets')
x_train, y_train, x_test, y_test = loader.read_tsv(dataset_name='ItalyPowerDemand_tsv',
data_path=path)

for i in [x_train, y_train, x_test, y_test]:
assert i is not None


def test_read_train_test_files():
ds_name = 'name'
path = '.'
Expand Down

0 comments on commit 0742766

Please sign in to comment.