Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/release_0.4' into release_0.4
Browse files Browse the repository at this point in the history
# Conflicts:
#	cases/utils.py
#	fedot_ind/core/architecture/abstraction/decorators.py
  • Loading branch information
v1docq committed Jan 18, 2024
2 parents a14a74d + 8280427 commit 2ee1280
Show file tree
Hide file tree
Showing 145 changed files with 2,293 additions and 1,431 deletions.
1 change: 0 additions & 1 deletion benchmark/abstract_bench.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import gc
import logging
import os

Expand Down
4 changes: 2 additions & 2 deletions benchmark/benchmark_TSC.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
from abc import ABC
from copy import deepcopy

from fedot_ind.core.architecture.settings.computational import backend_methods as np
import pandas as pd
from aeon.benchmarking.results_loaders import *

from benchmark.abstract_bench import AbstractBenchmark
from fedot_ind.api.utils.path_lib import PROJECT_PATH
from fedot_ind.core.architecture.postprocessing.results_picker import ResultsPicker
from fedot_ind.core.architecture.settings.computational import backend_methods as np
from fedot_ind.core.metrics.metrics_implementation import Accuracy
from fedot_ind.core.repository.constanst_repository import MULTI_CLF_BENCH, UNI_CLF_BENCH

Expand All @@ -25,7 +26,6 @@ def __init__(self,

self.logger = logging.getLogger(self.__class__.__name__)

# self._create_output_dir()
self.experiment_setup = experiment_setup
self.multi_TSC = MULTI_CLF_BENCH
self.uni_TSC = UNI_CLF_BENCH
Expand Down
4 changes: 0 additions & 4 deletions benchmark/benchmark_TSER.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def __init__(self,

self.logger = logging.getLogger(self.__class__.__name__)

# self._create_output_dir()
self.experiment_setup = experiment_setup
self.monash_regression = MULTI_REG_BENCH
if custom_datasets is None:
Expand Down Expand Up @@ -104,6 +103,3 @@ def show_composite_pipeline(self):
pr = PipelineNode('ridge', nodes_from=[p.root_node for p in batch_pipelines])
composed_pipeline = Pipeline(pr)
composed_pipeline.show()

_ = 1
return
50 changes: 4 additions & 46 deletions examples/example_utils.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import random
from pathlib import Path

from fedot_ind.core.architecture.settings.computational import backend_methods as np
import pandas as pd
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
from sklearn.metrics import explained_variance_score, max_error, mean_absolute_error, \
mean_squared_error, d2_absolute_error_score, \
median_absolute_error, r2_score
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.preprocessing import LabelEncoder

from fedot_ind.api.utils.path_lib import PROJECT_PATH
from sklearn.metrics import explained_variance_score, max_error, mean_absolute_error, \
mean_squared_error, d2_absolute_error_score, \
median_absolute_error, r2_score, mean_squared_log_error
from fedot_ind.core.architecture.settings.computational import backend_methods as np

ts_datasets = {
'm4_yearly': Path(PROJECT_PATH, 'examples', 'data', 'ts', 'M4YearlyTest.csv'),
Expand All @@ -23,13 +22,6 @@
'm4_quarterly': Path(PROJECT_PATH, 'examples', 'data', 'ts', 'M4QuarterlyTest.csv')}


def check_multivariate_data(data: pd.DataFrame) -> bool:
if isinstance(data.iloc[0, 0], pd.Series):
return True
else:
return False


def evaluate_metric(target, prediction):
try:
if len(np.unique(target)) > 2:
Expand All @@ -41,40 +33,6 @@ def evaluate_metric(target, prediction):
return metric


def init_input_data(X: pd.DataFrame, y: np.ndarray, task: str = 'classification') -> InputData:
is_multivariate_data = check_multivariate_data(X)
task_dict = {'classification': Task(TaskTypesEnum.classification),
'regression': Task(TaskTypesEnum.regression)}
features = X.values

if type((y)[0]) is np.str_ and task == 'classification':
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
elif type((y)[0]) is np.str_ and task == 'regression':
y = y.astype(float)

if is_multivariate_data:
input_data = InputData(idx=np.arange(len(X)),
features=np.array(features.tolist()).astype(np.float),
target=y.reshape(-1, 1),
task=task_dict[task],
data_type=DataTypesEnum.image)
else:
input_data = InputData(idx=np.arange(len(X)),
features=X.values,
target=np.ravel(y).reshape(-1, 1),
task=task_dict[task],
data_type=DataTypesEnum.table)

if task == 'regression':
input_data.target = input_data.target.squeeze()
elif task == 'classification':
input_data.target[input_data.target == -1] = 0
input_data.features = np.where(np.isnan(input_data.features), 0, input_data.features)
input_data.features = np.where(np.isinf(input_data.features), 0, input_data.features)
return input_data


def get_ts_data(dataset='m4_monthly', horizon: int = 30, m4_id=None):
time_series = pd.read_csv(ts_datasets[dataset])

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pandas as pd
from fedot.core.pipelines.pipeline_builder import PipelineBuilder

from examples.example_utils import evaluate_metric, init_input_data
from examples.example_utils import evaluate_metric
from fedot_ind.api.utils.data import init_input_data
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels
from fedot_ind.tools.loader import DataLoader

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline_builder import PipelineBuilder

from examples.example_utils import evaluate_metric, init_input_data
from examples.example_utils import evaluate_metric
from fedot_ind.api.utils.data import init_input_data
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels
from fedot_ind.tools.loader import DataLoader
from fedot.core.pipelines.pipeline import Pipeline
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from examples.example_utils import evaluate_metric
from examples.example_utils import init_input_data
from fedot_ind.api.utils.data import init_input_data
from fedot_ind.tools.loader import DataLoader
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from fedot.core.pipelines.pipeline_builder import PipelineBuilder

from examples.example_utils import init_input_data, calculate_regression_metric
from examples.example_utils import calculate_regression_metric
from fedot_ind.api.utils.data import init_input_data
from fedot_ind.api.utils.path_lib import PROJECT_PATH
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels
from fedot_ind.tools.loader import DataLoader
Expand Down
31 changes: 29 additions & 2 deletions fedot_ind/api/main.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
import logging
from pathlib import Path

from fedot.api.main import Fedot
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.metrics_repository import ClassificationMetricsEnum
from golem.core.tuning.simultaneous import SimultaneousTuner

from fedot_ind.api.utils.checkers_collections import DataCheck
from fedot_ind.api.utils.path_lib import DEFAULT_PATH_RESULTS as default_path_to_save_results
from fedot_ind.core.architecture.abstraction.decorators import DaskServer
from fedot_ind.core.architecture.settings.computational import BackendMethods
from fedot_ind.core.ensemble.random_automl_forest import RAFensembler
from fedot_ind.core.operation.transformation.splitter import TSTransformer
from fedot_ind.core.repository.constanst_repository import FEDOT_WORKER_NUM, BATCH_SIZE_FOR_FEDOT_WORKER, \
from fedot_ind.core.repository.constanst_repository import BATCH_SIZE_FOR_FEDOT_WORKER, FEDOT_WORKER_NUM, \
FEDOT_WORKER_TIMEOUT_PARTITION
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels
from fedot_ind.tools.explain.explain import PointExplainer
from fedot_ind.tools.synthetic.anomaly_generator import AnomalyGenerator
from fedot_ind.tools.synthetic.ts_generator import TimeSeriesGenerator

Expand Down Expand Up @@ -324,7 +327,31 @@ def plot_operation_distribution(self, mode: str = 'total'):
show_fitness=True, dpi=100)

def explain(self, **kwargs):
raise NotImplementedError()
""" Explain model's prediction via time series points perturbation
Args:
samples: int, ``default=1``. Number of samples to explain.
window: int, ``default=5``. Window size for perturbation.
metric: str ``default='rmse'``. Distance metric for perturbation impact assessment.
threshold: int, ``default=90``. Threshold for perturbation impact assessment.
name: str, ``default='test'``. Name of the dataset to be placed on plot.
"""
methods = {'point': PointExplainer,
'shap': NotImplementedError,
'lime': NotImplementedError}

explainer = methods[kwargs.get('method', 'point')](model=self.solver,
features=self.predict_data.features,
target=self.predict_data.target)
metric = kwargs.get('metric', 'rmse')
window = kwargs.get('window', 5)
samples = kwargs.get('samples', 1)
threshold = kwargs.get('threshold', 90)
name = kwargs.get('name', 'test')

explainer.explain(n_samples=samples, window=window, method=metric)
explainer.visual(threshold=threshold, name=name)

def generate_ts(self, ts_config: dict):
"""
Expand Down
17 changes: 11 additions & 6 deletions fedot_ind/api/utils/checkers_collections.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import logging

from fedot_ind.api.utils.data import check_multivariate_data
from fedot_ind.core.architecture.settings.computational import backend_methods as np
from fedot.core.data.data import InputData
from fedot.core.repository.dataset_types import DataTypesEnum
from sklearn.preprocessing import LabelEncoder
from fedot.core.repository.tasks import Task, TaskTypesEnum

from examples.example_utils import check_multivariate_data
from fedot_ind.core.architecture.preprocessing.data_convertor import NumpyConverter


Expand All @@ -28,7 +28,8 @@ def _init_input_data(self):

if is_multivariate_data:
self.input_data = InputData(idx=np.arange(len(X)),
features=np.array(X.values.tolist()).astype(np.float),
features=np.array(
X.values.tolist()).astype(np.float),
target=y.reshape(-1, 1),
task=self.task_dict[self.task],
data_type=DataTypesEnum.image)
Expand All @@ -42,14 +43,18 @@ def _init_input_data(self):
return

def _check_input_data_features(self):
self.input_data.features = np.where(np.isnan(self.input_data.features), 0, self.input_data.features)
self.input_data.features = np.where(np.isinf(self.input_data.features), 0, self.input_data.features)
self.input_data.features = NumpyConverter(data=self.input_data.features).convert_to_torch_format()
self.input_data.features = np.where(
np.isnan(self.input_data.features), 0, self.input_data.features)
self.input_data.features = np.where(
np.isinf(self.input_data.features), 0, self.input_data.features)
self.input_data.features = NumpyConverter(
data=self.input_data.features).convert_to_torch_format()

def _check_input_data_target(self):
if type(self.input_data.target[0][0]) is np.str_ and self.task == 'classification':
label_encoder = LabelEncoder()
self.input_data.target = label_encoder.fit_transform(self.input_data.target)
self.input_data.target = label_encoder.fit_transform(
self.input_data.target)
elif type(self.input_data.target[0][0]) is np.str_ and self.task == 'regression':
self.input_data.target = self.input_data.target.astype(float)

Expand Down
12 changes: 8 additions & 4 deletions fedot_ind/api/utils/configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,13 @@ def _get_generator_class(self) -> Union[BaseExtractor, None]:
else:
if generator.startswith('ensemble'):
dict_of_generators = {}
generators_to_ensemble = generator.transform_for_fit(': ')[1].transform_for_fit(' ')
generators_to_ensemble = generator.transform_for_fit(
': ')[1].transform_for_fit(' ')
for gen in generators_to_ensemble:
single_gen_class = self._extract_generator_class(gen)
dict_of_generators[gen] = single_gen_class
ensemble_gen_class = FeatureGenerator['ensemble'].value(list_of_generators=dict_of_generators)
ensemble_gen_class = FeatureGenerator['ensemble'].value(
list_of_generators=dict_of_generators)
self.feature_generator = 'ensemble'
return ensemble_gen_class

Expand All @@ -117,15 +119,17 @@ def _extract_generator_class(self, generator):
feature_gen_params = _feature_gen_params[f'{generator}_extractor']

for param in feature_gen_params:
feature_gen_params[param] = self.experiment_dict.get(param, feature_gen_params[param])
feature_gen_params[param] = self.experiment_dict.get(
param, feature_gen_params[param])

feature_gen_class = feature_gen_model(feature_gen_params)
return feature_gen_class

def __report_experiment_setup(self, experiment_dict):
"""Prints the experiment setup."""

top_info = ['task', 'dataset', 'strategy', 'branch_nodes', 'use_cache', 'n_jobs', 'timeout']
top_info = ['task', 'dataset', 'strategy',
'branch_nodes', 'use_cache', 'n_jobs', 'timeout']
label, data = [], []

for obj in top_info:
Expand Down
47 changes: 47 additions & 0 deletions fedot_ind/api/utils/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import numpy as np
import pandas as pd
from fedot.core.data.data import InputData
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import TaskTypesEnum, Task
from sklearn.preprocessing import LabelEncoder


def check_multivariate_data(data: pd.DataFrame) -> bool:
if isinstance(data.iloc[0, 0], pd.Series):
return True
else:
return False


def init_input_data(X: pd.DataFrame, y: np.ndarray, task: str = 'classification') -> InputData:
is_multivariate_data = check_multivariate_data(X)
task_dict = {'classification': Task(TaskTypesEnum.classification),
'regression': Task(TaskTypesEnum.regression)}
features = X.values

if type((y)[0]) is np.str_ and task == 'classification':
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
elif type((y)[0]) is np.str_ and task == 'regression':
y = y.astype(float)

if is_multivariate_data:
input_data = InputData(idx=np.arange(len(X)),
features=np.array(features.tolist()).astype(np.float),
target=y.reshape(-1, 1),
task=task_dict[task],
data_type=DataTypesEnum.image)
else:
input_data = InputData(idx=np.arange(len(X)),
features=X.values,
target=np.ravel(y).reshape(-1, 1),
task=task_dict[task],
data_type=DataTypesEnum.table)

if task == 'regression':
input_data.target = input_data.target.squeeze()
elif task == 'classification':
input_data.target[input_data.target == -1] = 0
input_data.features = np.where(np.isnan(input_data.features), 0, input_data.features)
input_data.features = np.where(np.isinf(input_data.features), 0, input_data.features)
return input_data
9 changes: 6 additions & 3 deletions fedot_ind/api/utils/metafeatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ def __init__(self, train_data, test_data, dataset_name):
self.test_data = test_data
self.dataset_name = dataset_name
self.logger = logging.getLogger(self.__class__.__name__)
self.logger.info(f'Initializing MetaFeaturesDetector for {dataset_name}')
self.logger.info(
f'Initializing MetaFeaturesDetector for {dataset_name}')

self.base_metafeatures = ['test_size', 'train_size', 'length', 'number_of_classes', 'type']
self.base_metafeatures = [
'test_size', 'train_size', 'length', 'number_of_classes', 'type']
self.extra_metafeatures = []

def get_base_metafeatures(self):
Expand All @@ -37,7 +39,8 @@ def get_extra_metafeatures(self):
pass

def run(self):
self.logger.info(f'Running MetaFeaturesDetector for {self.dataset_name}')
self.logger.info(
f'Running MetaFeaturesDetector for {self.dataset_name}')
base_metafeatures = self.get_base_metafeatures()
return {**base_metafeatures}
# extra_metafeatures = self.get_extra_metafeatures()
Expand Down
6 changes: 4 additions & 2 deletions fedot_ind/api/utils/path_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
PROJECT_PATH = str(Path(__file__).parent.parent.parent.parent)

# Default parameters of feature generators
PATH_TO_DEFAULT_PARAMS = os.path.join(PROJECT_PATH, 'fedot_ind/core/repository/data/default_operation_params.json')
PATH_TO_DEFAULT_PARAMS = os.path.join(
PROJECT_PATH, 'fedot_ind/core/repository/data/default_operation_params.json')

# For results collection
DS_INFO_PATH = os.path.join(PROJECT_PATH, 'fedot_ind', 'core', 'architecture', 'postprocessing', 'ucr_datasets.json')
DS_INFO_PATH = os.path.join(PROJECT_PATH, 'fedot_ind', 'core',
'architecture', 'postprocessing', 'ucr_datasets.json')

DEFAULT_PATH_RESULTS = os.path.join(PROJECT_PATH, 'results_of_experiments')
Loading

0 comments on commit 2ee1280

Please sign in to comment.