Skip to content

Commit

Permalink
Refactoring API methods, update model and constant repo, minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
v1docq committed Jan 22, 2024
1 parent 31b0e9f commit fc3dce0
Show file tree
Hide file tree
Showing 10 changed files with 1,437 additions and 1,077 deletions.
41 changes: 0 additions & 41 deletions examples/example_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
from sklearn.metrics import explained_variance_score, max_error, mean_absolute_error, \
mean_squared_error, d2_absolute_error_score, \
median_absolute_error, r2_score
from sklearn.metrics import f1_score, roc_auc_score

from fedot_ind.api.utils.path_lib import PROJECT_PATH
from fedot_ind.core.architecture.settings.computational import backend_methods as np

Expand Down Expand Up @@ -60,40 +56,3 @@ def get_ts_data(dataset='m4_monthly', horizon: int = 30, m4_id=None):
train_data, test_data = train_test_data_setup(train_input)
return train_data, test_data, label


def calculate_regression_metric(test_target, labels):
test_target = test_target.astype(np.float)
metric_dict = {'r2_score:': r2_score(test_target, labels),
'mean_squared_error:': mean_squared_error(test_target, labels),
'root_mean_squared_error:': np.sqrt(mean_squared_error(test_target, labels)),
'mean_absolute_error': mean_absolute_error(test_target, labels),
'median_absolute_error': median_absolute_error(test_target, labels),
'explained_variance_score': explained_variance_score(test_target, labels),
'max_error': max_error(test_target, labels),
'd2_absolute_error_score': d2_absolute_error_score(test_target, labels)
# 'root_mean_squared_log_error': mean_squared_log_error(test_target, labels, squared=False)
}
df = pd.DataFrame.from_dict(metric_dict, orient='index')
return df

# def visualise_and_save():
# for class_number in np.unique(train_data[1]):
# for basis_name, basis in zip(['basis_before_power_iterations', 'basis_after_power_iterations'],
# [basis_1d_raw, basis_1d_approx]):
# class_idx = np.where(train_data[1] == class_number)[0]
# class_slice = np.take(basis, class_idx, 0)
# pd.DataFrame(np.median(class_slice, axis=0)).T.plot()
# # plt.show()
# plt.savefig(f'{dataset_name}/{basis_name}_{class_number}_median_component.png', bbox_inches='tight')
# # plt.title(f'mean_{basis_name}_components_for_{class_number}_class')
# rank_distrib = pd.DataFrame([rank_distribution_befor, rank_distribution_after]).T
# rank_distrib.columns = ['HT_approach',
# 'Proposed_approach']
# rank_distrib.plot(kind='kde')
# # plt.show()
# rank_dispersion_ht = np.round(rank_distrib['HT_approach'].std(), 3)
# rank_dispersion_new = np.round(rank_distrib['Proposed_approach'].std(), 3)
# plt.savefig(f'{dataset_name}/rank_distrib. '
# f'Classical_rank_{low_rank_befor}_std_{rank_dispersion_ht}.'
# f'New_{low_rank_after}_std_{rank_dispersion_new}.png', bbox_inches='tight')
# rank_distrib['classes'] = train_data[1]
2,291 changes: 1,326 additions & 965 deletions examples/explainability.ipynb

Large diffs are not rendered by default.

109 changes: 52 additions & 57 deletions fedot_ind/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.metrics_repository import ClassificationMetricsEnum
from golem.core.tuning.simultaneous import SimultaneousTuner

from fedot_ind.api.utils.checkers_collections import DataCheck
Expand All @@ -15,11 +14,15 @@
from fedot_ind.core.ensemble.random_automl_forest import RAFensembler
from fedot_ind.core.operation.transformation.splitter import TSTransformer
from fedot_ind.core.repository.constanst_repository import BATCH_SIZE_FOR_FEDOT_WORKER, FEDOT_WORKER_NUM, \
FEDOT_WORKER_TIMEOUT_PARTITION
FEDOT_WORKER_TIMEOUT_PARTITION, FEDOT_GET_METRICS, FEDOT_TUNING_METRICS, FEDOT_HEAD_ENSEMBLE, \
FEDOT_ATOMIZE_OPERATION
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels
from fedot_ind.tools.explain.explain import PointExplainer
from fedot_ind.tools.synthetic.anomaly_generator import AnomalyGenerator
from fedot_ind.tools.synthetic.ts_generator import TimeSeriesGenerator
import warnings

warnings.filterwarnings("ignore")


class FedotIndustrial(Fedot):
Expand Down Expand Up @@ -59,11 +62,12 @@ class FedotIndustrial(Fedot):

def __init__(self, **kwargs):
self.output_folder = kwargs.get('output_folder')
self.preprocessing = kwargs.get('industrial_preprocessing', False)
self.backend_method = kwargs.get('backend', 'cpu')
if self.output_folder is None:
kwargs.setdefault('output_folder', default_path_to_save_results())
Path(kwargs.get('output_folder', default_path_to_save_results())).mkdir(parents=True, exist_ok=True)
self.output_folder = default_path_to_save_results
Path(self.output_folder).mkdir(parents=True, exist_ok=True)
else:

Path(self.output_folder).mkdir(parents=True, exist_ok=True)
del kwargs['output_folder']

Expand All @@ -77,51 +81,50 @@ def __init__(self, **kwargs):
)

super(Fedot, self).__init__()

self.logger = logging.getLogger('FedotIndustrialAPI')

self.config_dict = None
self.preprocessing_model = None

self.__init_experiment_setup(**kwargs)
self.ensemble_solver = None
self.preprocessing = True
self.config_dict = kwargs
self.__init_experiment_setup(**kwargs)

def __init_experiment_setup(self, **kwargs):
self.logger.info('Initialising experiment setup')
self.logger.info('Initialising Industrial Repository')
self.repo = IndustrialModels().setup_repository()
if 'problem' in kwargs.keys():
self.config_dict = kwargs
if 'backend' not in self.config_dict.keys():
self.config_dict['backend'] = 'cpu'
backend_method_current, backend_scipy_current = BackendMethods(self.config_dict['backend']).backend
if 'industrial_preprocessing' in self.config_dict.keys():
del self.config_dict['industrial_preprocessing']
backend_method_current, backend_scipy_current = BackendMethods(self.backend_method).backend
globals()['backend_methods'] = backend_method_current
globals()['backend_scipy'] = backend_scipy_current
del self.config_dict['backend']

def __init_solver(self):
self.logger.info('Initialising Dask Server')
self.dask_client = DaskServer().client
self.logger.info('Initialising solver')
solver = Fedot(**self.config_dict)
self.logger.info('Initialising Industrial Repository')
self.repo = IndustrialModels().setup_repository()
if type(self.config_dict['available_operations']) is not list:
solver = self.config_dict['available_operations'].build()
else:
self.logger.info('Initialising Dask Server')
self.dask_client = DaskServer().client
self.logger.info(f'LinK Dask Server - {self.dask_client.dashboard_link}')
self.logger.info('Initialising solver')
solver = Fedot(**self.config_dict)
return solver

def shutdown(self):
self.dask_client.close()
del self.dask_client

def _preprocessing_strategy(self, input_data):
if self.preprocessing:
if input_data.features.shape[0] > BATCH_SIZE_FOR_FEDOT_WORKER:
self.logger.info('RAF algorithm was applied')
batch_size = round(input_data.features.shape[0] / FEDOT_WORKER_NUM)
batch_timeout = round(self.config_dict['timeout'] / FEDOT_WORKER_TIMEOUT_PARTITION)
self.config_dict['timeout'] = batch_timeout
self.logger.info(f'Batch_size - {batch_size}. Number of batches - 5')
self.ensemble_solver = RAFensembler(composing_params=self.config_dict, batch_size=batch_size)
self.logger.info(f'Number of AutoMl models in ensemble - {self.ensemble_solver.n_splits}')
self.ensemble_solver.fit(input_data)
if input_data.features.shape[0] > BATCH_SIZE_FOR_FEDOT_WORKER:
self.logger.info('RAF algorithm was applied')
batch_size = round(input_data.features.shape[0] / FEDOT_WORKER_NUM)
batch_timeout = round(self.config_dict['timeout'] / FEDOT_WORKER_TIMEOUT_PARTITION)
self.config_dict['timeout'] = batch_timeout
self.logger.info(f'Batch_size - {batch_size}. Number of batches - 5')
self.ensemble_solver = RAFensembler(composing_params=self.config_dict, batch_size=batch_size)
self.logger.info(f'Number of AutoMl models in ensemble - {self.ensemble_solver.n_splits}')
self.ensemble_solver.fit(input_data)
self.solver = self.ensemble_solver
else:
self.preprocessing = False

def fit(self,
input_data,
Expand All @@ -141,14 +144,10 @@ def fit(self,
"""

input_data = DataCheck(input_data=input_data, task=self.config_dict['problem']).check_input_data()
self._preprocessing_strategy(input_data)
if self.preprocessing_model is not None:
input_data.features = self.preprocessing_model.predict(input_data).predict
self.logger.info(f'Train Dataset size after preprocessing - {input_data.features.shape}')
self.solver = self.__init_solver()
if self.ensemble_solver is not None:
if self.preprocessing:
self._preprocessing_strategy(input_data)
fitted_pipeline = self.ensemble_solver
self.solver = self.ensemble_solver
else:
fitted_pipeline = self.solver.fit(input_data)
return fitted_pipeline
Expand All @@ -168,10 +167,8 @@ def predict(self,
"""
self.predict_data = DataCheck(input_data=predict_data, task=self.config_dict['problem']).check_input_data()
if self.preprocessing_model is not None:
self.predict_data.features = self.preprocessing_model.predict(self.predict_data).predict
self.logger.info(f'Test Dataset size after preprocessing - {self.predict_data.features.shape}')
return self.solver.predict(self.predict_data)
return self.solver.predict(self.predict_data) if type(self.solver) is Fedot else \
self.solver.predict(self.predict_data, output_mode='labels').predict

def predict_proba(self,
predict_data,
Expand All @@ -187,11 +184,9 @@ def predict_proba(self,
:param predict_data:
"""
self.predict_data = DataCheck(input_data=predict_data, task=self.config_dict['task']).check_input_data()
if self.preprocessing_model is not None:
self.predict_data.features = self.preprocessing_model.predict(predict_data).predict
self.logger.info(f'Test Dataset size after preprocessing - {self.predict_data.features.shape}')
return self.solver.predict_proba(self.predict_data)
self.predict_data = DataCheck(input_data=predict_data, task=self.config_dict['problem']).check_input_data()
return self.solver.predict_proba(self.predict_data) if type(self.solver) is Fedot else \
self.solver.predict(self.predict_data, output_mode='probs').predict

def finetune(self,
train_data,
Expand All @@ -210,18 +205,17 @@ def finetune(self,
"""
train_data = DataCheck(input_data=train_data, task=self.config_dict['problem']).check_input_data()
metric = ClassificationMetricsEnum.accuracy
# tuning_method = partial(SequentialTuner, inverse_node_order=True)
metric = FEDOT_TUNING_METRICS[self.config_dict['problem']]
tuning_method = SimultaneousTuner

if mode == 'head':
head_type = {'regression': 'fedot_regr',
'classification': 'fedot_cls'}
batch_pipelines = [automl_branch for automl_branch in self.current_pipeline.nodes if
automl_branch.name == 'fedot_regr']
pr = PipelineNode(operation_type=head_type[self.config_dict['problem']],
automl_branch.name in FEDOT_ATOMIZE_OPERATION.values()]

pr = PipelineNode(operation_type=FEDOT_HEAD_ENSEMBLE[self.config_dict['problem']],
nodes_from=batch_pipelines,
content={'params': self.config_dict,
'name': 'fedot_regr'})
'name': FEDOT_HEAD_ENSEMBLE[self.config_dict['problem']]})
composed_pipeline = Pipeline(pr)
composed_pipeline.fit(train_data)
self.current_pipeline = composed_pipeline
Expand Down Expand Up @@ -250,7 +244,7 @@ def finetune_predict(self, test_data):
self.predict_data = DataCheck(input_data=test_data, task=self.config_dict['problem']).check_input_data()
return self.current_pipeline.predict(self.predict_data, 'labels').predict

def get_metrics(self, **kwargs) -> dict:
def get_metrics(self, target, labels, probs) -> dict:
"""
Method to obtain Gets quality metrics
Expand All @@ -263,7 +257,7 @@ def get_metrics(self, **kwargs) -> dict:
the dictionary with calculated metrics
"""
return self.solver.get_metrics(**kwargs)
return FEDOT_GET_METRICS[self.config_dict['problem']](target, labels, probs)

def save_predict(self, predicted_data, **kwargs) -> None:
"""
Expand Down Expand Up @@ -408,3 +402,4 @@ def split_ts(self, time_series,
binarize=binarize)

return train_data, test_data

4 changes: 4 additions & 0 deletions fedot_ind/api/utils/checkers_collections.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import logging

import pandas as pd

from fedot_ind.api.utils.data import check_multivariate_data
from fedot_ind.core.architecture.settings.computational import backend_methods as np
from fedot.core.data.data import InputData
Expand All @@ -24,6 +26,8 @@ def _init_input_data(self):

if type(self.input_data) is tuple:
X, y = self.input_data[0], self.input_data[1]
if type(X) is not pd.DataFrame:
X = pd.DataFrame(X)
is_multivariate_data = check_multivariate_data(X)

if is_multivariate_data:
Expand Down
7 changes: 4 additions & 3 deletions fedot_ind/core/architecture/abstraction/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@ class DaskServer(metaclass=Singleton):
def __init__(self):
print('Creating Dask Server')
cluster = LocalCluster(processes=False,
n_workers=6,
threads_per_worker=1,
memory_limit='4GB')
# n_workers=4,
# threads_per_worker=4,
# memory_limit='3GB'
)
# connect client to your cluster
self.client = Client(cluster)
4 changes: 2 additions & 2 deletions fedot_ind/core/architecture/settings/computational.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@


class BackendMethods:
def __init__(self, device_type: str = 'cpu'):
def __init__(self, device_type: str = 'CUDA'):
self.backend = self.define_backend(device_type)

def define_backend(self, device_type: str = 'cpu'):
def define_backend(self, device_type: str = 'CUDA'):
if device_type == 'CUDA':
import cupy, cupyx.scipy.linalg
return cupy, cupyx.scipy.linalg
Expand Down
30 changes: 29 additions & 1 deletion fedot_ind/core/metrics/metrics_implementation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Union

from sklearn.metrics import explained_variance_score, max_error, mean_absolute_error, \
mean_squared_error, d2_absolute_error_score, \
median_absolute_error, r2_score
from fedot_ind.core.architecture.settings.computational import backend_methods as np
import pandas as pd
from sklearn.metrics import (accuracy_score, f1_score,
Expand Down Expand Up @@ -152,3 +154,29 @@ def metric(self) -> float:
target = self.target
prediction = self.predicted_labels
return accuracy_score(y_true=target, y_pred=prediction)



def calculate_regression_metric(test_target, labels):
test_target = test_target.astype(np.float)
metric_dict = {'r2_score:': r2_score(test_target, labels),
'mean_squared_error:': mean_squared_error(test_target, labels),
'root_mean_squared_error:': np.sqrt(mean_squared_error(test_target, labels)),
'mean_absolute_error': mean_absolute_error(test_target, labels),
'median_absolute_error': median_absolute_error(test_target, labels),
'explained_variance_score': explained_variance_score(test_target, labels),
'max_error': max_error(test_target, labels),
'd2_absolute_error_score': d2_absolute_error_score(test_target, labels)
}
df = pd.DataFrame.from_dict(metric_dict, orient='index')
return df


def calculate_classification_metric(test_target, labels, probs):

metric_dict = {'accuracy:': Accuracy(test_target, labels, probs).metric(),
'f1': F1(test_target, labels, probs).metric(),
'roc_auc:': ROCAUC(test_target, labels, probs).metric()
}
df = pd.DataFrame.from_dict(metric_dict, orient='index')
return df
13 changes: 11 additions & 2 deletions fedot_ind/core/repository/constanst_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import numpy as np
import pywt
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.metrics_repository import ClassificationMetricsEnum, RegressionMetricsEnum

from fedot_ind.core.metrics.metrics_implementation import calculate_regression_metric, calculate_classification_metric
from fedot_ind.core.models.topological.topofeatures import *
from torch import nn

Expand Down Expand Up @@ -112,8 +114,13 @@ class FedotOperationConstant(Enum):
EXCLUDED_OPERATION = ['fast_ica']
FEDOT_TASK = {'classification': Task(TaskTypesEnum.classification),
'regression': Task(TaskTypesEnum.regression)}
FEDOT_HEAD_ENSEMBLE = {'classification': 'logit',
'regression': 'ridge'}

FEDOT_GET_METRICS = {'regression': calculate_regression_metric,
'classification': calculate_classification_metric}
FEDOT_TUNING_METRICS = {'classification': ClassificationMetricsEnum.accuracy,
'regression': RegressionMetricsEnum.RMSE}
FEDOT_HEAD_ENSEMBLE = {'regression': 'fedot_regr',
'classification': 'fedot_cls'}
FEDOT_ATOMIZE_OPERATION = {'regression': 'fedot_regr',
'classification': 'fedot_cls'}
AVAILABLE_CLS_OPERATIONS = [
Expand Down Expand Up @@ -357,6 +364,8 @@ class BenchmarkDatasets(Enum):
FEDOT_HEAD_ENSEMBLE = FedotOperationConstant.FEDOT_HEAD_ENSEMBLE.value
FEDOT_TASK = FedotOperationConstant.FEDOT_TASK.value
FEDOT_ATOMIZE_OPERATION = FedotOperationConstant.FEDOT_ATOMIZE_OPERATION.value
FEDOT_GET_METRICS = FedotOperationConstant.FEDOT_GET_METRICS.value
FEDOT_TUNING_METRICS = FedotOperationConstant.FEDOT_TUNING_METRICS.value

CPU_NUMBERS = ComputationalConstant.CPU_NUMBERS.value
BATCH_SIZE_FOR_FEDOT_WORKER = ComputationalConstant.BATCH_SIZE_FOR_FEDOT_WORKER.value
Expand Down
Loading

0 comments on commit fc3dce0

Please sign in to comment.