Skip to content

Commit

Permalink
unit and integration test improvement + explainer module (#108)
Browse files Browse the repository at this point in the history
* unit-test coverage enhanced
* integration tests improvement
* added point/interval explainer with distance methods
  • Loading branch information
technocreep authored Dec 21, 2023
1 parent 3351c3b commit de7df83
Show file tree
Hide file tree
Showing 127 changed files with 4,619 additions and 769 deletions.
100 changes: 100 additions & 0 deletions examples/data/ItalyPowerDemand_fake/ItalyPowerDemand_fake_TEST.arff

Large diffs are not rendered by default.

80 changes: 80 additions & 0 deletions examples/data/ItalyPowerDemand_fake/ItalyPowerDemand_fake_TEST.ts

Large diffs are not rendered by default.

67 changes: 67 additions & 0 deletions examples/data/ItalyPowerDemand_fake/ItalyPowerDemand_fake_TEST.txt

Large diffs are not rendered by default.

100 changes: 100 additions & 0 deletions examples/data/ItalyPowerDemand_fake/ItalyPowerDemand_fake_TRAIN.arff

Large diffs are not rendered by default.

80 changes: 80 additions & 0 deletions examples/data/ItalyPowerDemand_fake/ItalyPowerDemand_fake_TRAIN.ts

Large diffs are not rendered by default.

Large diffs are not rendered by default.

67 changes: 67 additions & 0 deletions examples/ensemble/kernel_ensemble_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from fedot import Fedot

from fedot_ind.core.ensemble.kernel_ensemble import init_kernel_ensemble
from fedot_ind.core.ensemble.rank_ensembler import RankEnsemble
from fedot_ind.tools.loader import DataLoader

n_best = 3
feature_dict = {}
metric_list = []
proba_dict = {}
metric_dict = {}
dataset_name = 'Lightning2'
kernel_list = {'wavelet': [
{'feature_generator_type': 'signal',
'feature_hyperparams': {
'wavelet': "mexh",
'n_components': 2
}},
{'feature_generator_type': 'signal',
'feature_hyperparams': {
'wavelet': "morl",
'n_components': 2
}}],
'quantile': [
{'feature_generator_type': 'quantile',
'feature_hyperparams': {
'window_mode': True,
'window_size': 25
}
},
{'feature_generator_type': 'quantile',
'feature_hyperparams': {
'window_mode': False,
'window_size': 40
}
}]
}
fg_names = []
for key in kernel_list:
for model_params in kernel_list[key]:
fg_names.append(f'{key}_{model_params}')

train_data, test_data = DataLoader(dataset_name).load_data()
set_of_fg, train_feats, train_target, test_feats, test_target = init_kernel_ensemble(train_data,
test_data,
kernel_list=kernel_list)

n_best_generators = set_of_fg.T.nlargest(n_best, 0).index
for rank in range(n_best):
fg_rank = n_best_generators[rank]
train_best = train_feats[fg_rank]
test_best = test_feats[fg_rank]
feature_dict.update({fg_names[rank]: (test_best, test_best)})

for model_name, feature in feature_dict.items():
industrial = Fedot(metric='roc_auc', timeout=5, problem='classification', n_jobs=6)

model = industrial.fit(feature[0], train_target)
labels = industrial.predict(feature[1])
proba_dict.update({model_name: industrial.predict_proba(feature[1])})
metric_dict.update({model_name: industrial.get_metrics(test_target, metric_names=['roc_auc', 'f1', 'accuracy'])})
rank_ensembler = RankEnsemble(dataset_name=dataset_name,
proba_dict={dataset_name: proba_dict},
metric_dict={dataset_name: metric_dict})

ensemble_result = rank_ensembler.ensemble()
_ = 1
1,477 changes: 1,477 additions & 0 deletions examples/explainability.ipynb

Large diffs are not rendered by default.

22 changes: 7 additions & 15 deletions fedot_ind/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from fedot.core.pipelines.pipeline import Pipeline

from fedot_ind.api.utils.configurator import Configurator
from fedot_ind.api.utils.path_lib import default_path_to_save_results
from fedot_ind.api.utils.path_lib import DEFAULT_PATH_RESULTS
from fedot_ind.core.architecture.experiment.computer_vision import CV_TASKS
from fedot_ind.core.architecture.settings.task_factory import TaskEnum
from fedot_ind.core.operation.transformation.splitter import TSTransformer
Expand Down Expand Up @@ -52,8 +52,8 @@ class FedotIndustrial(Fedot):
"""

def __init__(self, **kwargs):
kwargs.setdefault('output_folder', default_path_to_save_results())
Path(kwargs.get('output_folder', default_path_to_save_results())).mkdir(parents=True, exist_ok=True)
kwargs.setdefault('output_folder', DEFAULT_PATH_RESULTS)
Path(kwargs.get('output_folder', DEFAULT_PATH_RESULTS)).mkdir(parents=True, exist_ok=True)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(levelname)s: %(name)s - %(message)s',
Expand All @@ -63,20 +63,15 @@ def __init__(self, **kwargs):
]
)
super(Fedot, self).__init__()

self.logger = logging.getLogger('FedotIndustrialAPI')

# self.reporter = ReporterTSC()
self.configurator = Configurator()

self.config_dict = None

self.__init_experiment_setup(**kwargs)
self.solver = self.__init_solver()

def __init_experiment_setup(self, **kwargs):
self.logger.info('Initialising experiment setup')
# self.reporter.path_to_save = kwargs.get('output_folder')
if 'task' in kwargs.keys() and kwargs['task'] in CV_TASKS.keys():
self.config_dict = kwargs
else:
Expand All @@ -88,9 +83,6 @@ def __init_solver(self):
if self.config_dict['task'] == 'ts_classification':
if self.config_dict['strategy'] == 'fedot_preset':
solver = TaskEnum[self.config_dict['task']].value['fedot_preset']
# elif self.config_dict['strategy'] is None:
# self.config_dict['strategy'] = 'InceptionTime'
# solver = TaskEnum[self.config_dict['task']].value['nn']
else:
solver = TaskEnum[self.config_dict['task']].value['default']
elif self.config_dict['task'] == 'ts_forecasting':
Expand Down Expand Up @@ -249,11 +241,11 @@ def split_ts(self, time_series: np.array,
strategy: str = 'frequent',
plot: bool = True) -> Tuple[np.array, np.array]:

splitter = TSTransformer(time_series=time_series,
anomaly_dict=anomaly_dict,
strategy=strategy)
splitter = TSTransformer(strategy=strategy)

train_data, test_data = splitter.transform_for_fit(plot=plot,
train_data, test_data = splitter.transform_for_fit(series=time_series,
anomaly_dict=anomaly_dict,
plot=plot,
binarize=binarize)

return train_data, test_data
22 changes: 9 additions & 13 deletions fedot_ind/api/utils/input_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,32 @@ def init_input_data(X: pd.DataFrame, y: np.ndarray, task: str = 'classification'
Args:
X: pandas DataFrame with features
y: numpy array with target values
task: str, task type, 'classification' or 'regression'
Returns:
InputData object convenient for FEDOT framework
Example:
To produce input data object::
rows, cols = 100, 50
X = pd.DataFrame(np.random.random((rows, cols)))
y = np.random.randint(0, 2, rows)
input_data = init_input_data(X, y)
"""
is_multivariate_data = True if isinstance(X.iloc[0, 0], pd.Series) else False
if is_multivariate_data:
input_data = InputData(idx=np.arange(len(X)),
features=np.array(X.values.tolist()),
target=y.reshape(-1, 1),
# task=Task(TaskTypesEnum.classification),
task=Task(TaskTypesEnum(task)),
data_type=DataTypesEnum.image)
else:
input_data = InputData(idx=np.arange(len(X)),
features=X.values,
target=np.ravel(y).reshape(-1, 1),
# task=Task(TaskTypesEnum.classification),
task=Task(TaskTypesEnum(task)),
data_type=DataTypesEnum.table)

return input_data


if __name__ == '__main__':
rows, cols = 100, 50

X = pd.DataFrame(np.random.random((rows, cols)))
y = np.random.randint(0, 2, rows)

input_data = init_input_data(X, y)

_ = 1
8 changes: 2 additions & 6 deletions fedot_ind/api/utils/path_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@
PATH_TO_DEFAULT_PARAMS = os.path.join(PROJECT_PATH, 'fedot_ind/core/repository/data/default_operation_params.json')

# For results collection
DS_INFO_PATH = os.path.join(PROJECT_PATH, 'core', 'architecture', 'postprocessing', 'ucr_datasets.json')
DS_INFO_PATH = os.path.join(PROJECT_PATH, 'fedot_ind', 'core', 'architecture', 'postprocessing', 'ucr_datasets.json')


def default_path_to_save_results() -> str:
path = PROJECT_PATH
save_path = os.path.join(path, 'results_of_experiments')
return save_path
DEFAULT_PATH_RESULTS = os.path.join(PROJECT_PATH, 'results_of_experiments')
37 changes: 0 additions & 37 deletions fedot_ind/api/utils/reporter.py

This file was deleted.

11 changes: 5 additions & 6 deletions fedot_ind/api/utils/saver_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pandas as pd

from fedot_ind.api.utils.path_lib import default_path_to_save_results
from fedot_ind.api.utils.path_lib import DEFAULT_PATH_RESULTS


class ResultSaver:
Expand All @@ -15,12 +15,11 @@ def __init__(self, dataset_name: str, generator_name: str, output_dir: str = Non
self.save_method_dict = {'labels': self.save_labels,
'probs': self.save_probs,
'metrics': self.save_metrics,
'baseline_metrics': self.save_baseline_metrics
}
'baseline_metrics': self.save_baseline_metrics}

def __init_save_path(self, dataset_name, generator_name, output_dir):
if output_dir is None:
self.output_dir = default_path_to_save_results()
self.output_dir = DEFAULT_PATH_RESULTS
else:
self.output_dir = os.path.abspath(output_dir)
path = os.path.join(self.output_dir, generator_name, dataset_name)
Expand All @@ -37,12 +36,12 @@ def save(self, predicted_data, prediction_type: str):

def save_labels(self, label_data):
df = pd.DataFrame(label_data, dtype=int)
df.to_csv(os.path.join(self.path, 'predicted_labels.csv'))
df.to_csv(os.path.join(self.path, 'labels.csv'))

def save_probs(self, prob_data):
df_preds = pd.DataFrame(prob_data.round(3), dtype=float)
df_preds.columns = [f'Class_{x + 1}' for x in df_preds.columns]
df_preds.to_csv(os.path.join(self.path, 'predicted_probs.csv'))
df_preds.to_csv(os.path.join(self.path, 'probs.csv'))

def save_metrics(self, metrics: dict):
df = pd.DataFrame(metrics, index=[0])
Expand Down
3 changes: 2 additions & 1 deletion fedot_ind/core/architecture/datasets/splitters.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
This module contains functions for splitting a torch dataset into parts.
"""
from typing import List, Tuple, Generator, Optional, Dict
from typing import Dict, Generator, List, Optional, Tuple

import numpy as np
from torch.utils.data import Dataset, Subset
Expand Down Expand Up @@ -44,6 +44,7 @@ def k_fold(dataset: Dataset, n: int) -> Generator[Tuple[Subset, Subset], None, N
train_ds = Subset(dataset, train_indices)
yield train_ds, test_ds


def split_data(dataset: Dataset, n: int, verbose: bool = False) -> List[np.ndarray]:
"""
Splits the data into n parts, keeping the proportions of the classes.
Expand Down
Loading

0 comments on commit de7df83

Please sign in to comment.