Skip to content

Commit

Permalink
pre-release fixes in extractors and industrial repo init
Browse files Browse the repository at this point in the history
  • Loading branch information
v1docq committed Dec 22, 2023
1 parent 419dcb8 commit 2a79ed6
Show file tree
Hide file tree
Showing 9 changed files with 200 additions and 46 deletions.
15 changes: 15 additions & 0 deletions benchmark/abstract_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def evaluate_loop(self, dataset, experiment_setup: dict = None):
matplotlib.use('TkAgg')
train_data, test_data = DataLoader(dataset_name=dataset).load_data()
experiment_setup['output_folder'] = experiment_setup['output_folder'] + f'/{dataset}'
if 'tuning_params' in experiment_setup.keys():
del experiment_setup['tuning_params']
model = FedotIndustrial(**experiment_setup)
model.fit(train_data)
prediction = model.predict(test_data)
Expand All @@ -71,6 +73,19 @@ def evaluate_loop(self, dataset, experiment_setup: dict = None):
print('No_visualisation')
gc.collect()
return prediction, model.predict_data.target

def finetune_loop(self, dataset, experiment_setup: dict = None):
train_data, test_data = DataLoader(dataset_name=dataset).load_data()
experiment_setup['output_folder'] = experiment_setup['output_folder'] + f'/{dataset}'
tuning_params = experiment_setup['tuning_params']
del experiment_setup['tuning_params']
model = FedotIndustrial(**experiment_setup)
model.load(path=experiment_setup['output_folder'] + '/0_pipeline_saved')
model.finetune(train_data, tuning_params=tuning_params)
prediction = model.finetune_predict(test_data)
gc.collect()
return prediction, model.predict_data.target

def collect_results(self, output_dir):
"""Collect the results of the benchmark.
Expand Down
27 changes: 25 additions & 2 deletions benchmark/benchmark_TSC.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def run(self):
metric = Accuracy(target, prediction).metric()
metric_dict.update({dataset_name: metric})
basic_results.loc[dataset_name, 'Fedot_Industrial'] = metric
dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}', 'metrics_report.csv')
dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}',
'metrics_report.csv')
basic_results.to_csv(dataset_path)
except Exception:
print('Skip dataset')
Expand All @@ -64,6 +65,29 @@ def run(self):
basic_results.to_csv(basic_path)
self.logger.info("Benchmark test finished")

def finetune(self):
self.logger.info('Benchmark finetune started')
for dataset_name in self.custom_datasets:
try:
composed_model_path = PROJECT_PATH + self.path_to_save + f'/{dataset_name}' + '/0_pipeline_saved'
if os.path.isdir(composed_model_path):
self.experiment_setup['output_folder'] = PROJECT_PATH + self.path_to_save
experiment_setup = deepcopy(self.experiment_setup)
prediction, target = self.finetune_loop(dataset_name, experiment_setup)
metric = Accuracy(target, prediction).metric()
dataset_path = os.path.join(self.experiment_setup['output_folder'], f'{dataset_name}',
'metrics_report.csv')
fedot_results = pd.read_csv(dataset_path, index_col=0)
fedot_results.loc[dataset_name, 'Fedot_Industrial_finetuned'] = metric

fedot_results.to_csv(dataset_path)
else:
print(f"No composed model for dataset - {dataset_name}")
except Exception:
print('Skip dataset')
gc.collect()
self.logger.info("Benchmark finetune finished")

def load_local_basic_results(self, path: str = None):
if path is None:
path = PROJECT_PATH + self.path_to_result
Expand All @@ -83,4 +107,3 @@ def load_web_results(self):
sota_results = get_estimator_results(estimators=sota_estimators['classification'].values.tolist())
sota_results_df = pd.DataFrame(sota_results)
return sota_results_df

74 changes: 39 additions & 35 deletions examples/benchmark/time_series_uni_clf_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
'initial_assumption': None,
'max_pipeline_fit_time': 10,
'with_tuning': False,
'tuning_params': {'tuning_timeout': 10,
'tuning_iterations': 1000,
'tuning_early_stop': 50},
'early_stopping_iterations': 5,
'early_stopping_timeout': 90,
'optimizer': IndustrialEvoOptimizer}
Expand All @@ -36,41 +39,41 @@
custom_datasets=[
# "Beef",
# "BeetleFly",
"BirdChicken",
"BME",
"Car",
"CBF",
"Chinatown",
"ChlorineConcentration",
"CinCECGTorso",
"Coffee",
"Computers",
"CricketX",
"CricketY",
"CricketZ",
"Crop",
"DiatomSizeReduction",
"DistalPhalanxOutlineCorrect",
"DistalPhalanxOutlineAgeGroup",
"DistalPhalanxTW",
"Earthquakes",
"ECG200",
"ECG5000",
"ECGFiveDays",
"ElectricDevices",
"EOGHorizontalSignal",
"EOGVerticalSignal",
"EthanolLevel",
"FaceAll",
"FaceFour",
"FacesUCR",
"FiftyWords",
"Fish",
"FordA",
"FordB",
"FreezerRegularTrain",
"FreezerSmallTrain",
"Fungi",
# "BirdChicken",
# "BME",
# "Car",
# "CBF",
# "Chinatown",
# "ChlorineConcentration",
# "CinCECGTorso",
# "Coffee",
# "Computers",
# "CricketX",
# "CricketY",
# "CricketZ",
# "Crop",
# "DiatomSizeReduction",
# "DistalPhalanxOutlineCorrect",
# "DistalPhalanxOutlineAgeGroup",
# "DistalPhalanxTW",
# "Earthquakes",
# "ECG200",
# "ECG5000",
# "ECGFiveDays",
# "ElectricDevices",
# "EOGHorizontalSignal",
# "EOGVerticalSignal",
# "EthanolLevel",
# "FaceAll",
# "FaceFour",
# "FacesUCR",
# "FiftyWords",
# "Fish",
# "FordA",
# "FordB",
# "FreezerRegularTrain",
# "FreezerSmallTrain",
# "Fungi",
"GunPoint",
"GunPointAgeSpan",
"GunPointMaleVersusFemale",
Expand Down Expand Up @@ -147,3 +150,4 @@
],
use_small_datasets=True)
benchmark.run()
#benchmark.finetune()
52 changes: 49 additions & 3 deletions fedot_ind/api/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from functools import partial
from pathlib import Path
from typing import Tuple, Union

Expand All @@ -7,7 +8,10 @@
from fedot.api.main import Fedot
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_builder import PipelineBuilder

from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum
from golem.core.tuning.simultaneous import SimultaneousTuner
from golem.core.tuning.sequential import SequentialTuner
from fedot_ind.api.utils.checkers_collections import DataCheck
from fedot_ind.api.utils.path_lib import default_path_to_save_results
from fedot_ind.core.operation.transformation.splitter import TSTransformer
Expand Down Expand Up @@ -98,7 +102,6 @@ def _preprocessing_strategy(self, input_data):
self.logger.info(f'Dataset size before preprocessing - {input_data.features.shape}')
self.logger.info('PCA transformation was applied to input data due to dataset size')
if len(input_data.features.shape) == 3:
#self.preprocessing_model = PipelineBuilder().add_node('eigen_basis', params={'tensor_approximation': True}).build()
self.preprocessing_model = PipelineBuilder().add_node('pca', params={'n_components': 0.9}).build()
else:
self.preprocessing_model = PipelineBuilder().add_node('pca', params={'n_components': 0.9}).build()
Expand Down Expand Up @@ -161,6 +164,48 @@ def predict_proba(self, predict_data, **kwargs) -> np.ndarray:
self.logger.info(f'Test Dataset size after preprocessing - {self.predict_data.features.shape}')
return self.solver.predict_proba(self.predict_data)

def finetune(self, train_data, tuning_params) -> np.ndarray:
"""
Method to obtain prediction probabilities from trained Industrial model.
Args:
test_features: raw test data
Returns:
the array with prediction probabilities
"""
train_data = DataCheck(input_data=train_data, task=self.config_dict['problem']).check_input_data()
if train_data.num_classes > 2:
metric = ClassificationMetricsEnum.f1
else:
metric = ClassificationMetricsEnum.accuracy
tuning_method = partial(SequentialTuner, inverse_node_order=True)
tuning_method = SimultaneousTuner
pipeline_tuner = TunerBuilder(train_data.task) \
.with_tuner(tuning_method) \
.with_metric(metric) \
.with_timeout(tuning_params['tuning_timeout']) \
.with_early_stopping_rounds(tuning_params['tuning_early_stop']) \
.with_iterations(tuning_params['tuning_iterations']) \
.build(train_data)
self.current_pipeline = pipeline_tuner.tune(self.current_pipeline)
self.current_pipeline.fit(train_data)

def finetune_predict(self, test_data) -> np.ndarray:
"""
Method to obtain prediction probabilities from trained Industrial model.
Args:
test_features: raw test data
Returns:
the array with prediction probabilities
"""
self.predict_data = DataCheck(input_data=test_data, task=self.config_dict['problem']).check_input_data()
return self.current_pipeline.predict(self.predict_data, 'labels').predict

def get_metrics(self, **kwargs) -> dict:
"""
Method to obtain Gets quality metrics
Expand Down Expand Up @@ -210,7 +255,8 @@ def load(self, path):
path (str): path to the model
"""
raise NotImplementedError()
self.current_pipeline = Pipeline(use_input_preprocessing=self.solver.params.get('use_input_preprocessing'))
self.current_pipeline.load(path)

def save_optimization_history(self, **kwargs):
"""Plot prediction of the model"""
Expand Down
2 changes: 1 addition & 1 deletion fedot_ind/core/models/base_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def get_statistical_features(self,

for method in list_of_methods:
try:
features.append(method[1](time_series))
features.append(method[1](time_series).reshape(-1,1))
names.append(method[0])
except ValueError:
continue
Expand Down
2 changes: 1 addition & 1 deletion fedot_ind/core/models/signal/signal_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, params: Optional[OperationParameters] = None):

def _transform(self, input_data: InputData) -> np.array:
wavelet_basis = self.wavelet_basis({'n_components': self.n_components,
'wavelet': self.wavelet})
'wavelet':self.wavelet})
transformed_features = wavelet_basis.transform(input_data)
predict = self._clean_predict(transformed_features.predict)
return predict
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,10 @@ def _transform(self, input_data: Union[InputData, pd.DataFrame]) -> np.array:
features = np.array(ListMonad(*input_data.features.tolist()).value)
else:
features = np.array(ListMonad(*input_data.tolist()).value)
#features = np.array([series[~np.isnan(series)] for series in features])
if len(features.shape) == 2 and features.shape[1] == 1:
features = features.reshape(1, -1)
elif len(features.shape) == 3 and features.shape[1] == 1:
features = features.squeeze()
parallel = Parallel(n_jobs=self.n_processes, verbose=0, pre_dispatch="2*n_jobs")
v = parallel(delayed(self._transform_one_sample)(sample) for sample in features)

Expand Down
48 changes: 48 additions & 0 deletions fedot_ind/core/repository/initializer_industrial_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@
from enum import Enum
from typing import Sequence
from random import choice, sample
from typing import List, Iterable, Union, Optional

import numpy as np

from fedot.core.data.array_utilities import atleast_4d

from fedot.api.api_utils.api_composer import ApiComposer
from fedot.api.api_utils.api_params_repository import ApiParamsRepository
from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation, boosting_mutation
from fedot.core.data.merge.data_merger import ImageDataMerger
from fedot.core.pipelines.adapters import PipelineAdapter
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
Expand Down Expand Up @@ -309,6 +315,46 @@ def has_no_data_flow_conflicts_in_industrial_pipeline(pipeline: Pipeline):
return True


def preprocess_predicts(*args) -> List[np.array]:
predicts = args[1]
if len(predicts[0].shape) <= 3:
return predicts
else:
reshaped_predicts = list(map(atleast_4d, predicts))

# And check image sizes
img_wh = [predict.shape[1:3] for predict in reshaped_predicts]
invalid_sizes = len(set(img_wh)) > 1 # Can merge only images of the same size
if invalid_sizes:
raise ValueError("Can't merge images of different sizes: " + str(img_wh))
return reshaped_predicts


def merge_predicts(*args) -> np.array:
predicts = args[1]
predicts = [x.reshape(-1, 1) if len(x.shape) == 1 else x for x in predicts]

channel_shape, elem_shape = [(x.shape[1], x.shape[2]) if len(x.shape) > 2 else (1, x.shape[0]) for x in predicts][0]

chanel_concat = [x.shape[1] == channel_shape if len(x.shape) > 2
else 1 == channel_shape for x in predicts]

element_wise_concat = [x.shape[2] == elem_shape if len(x.shape) > 2
else x.shape[1] == elem_shape for x in predicts]

if all(chanel_concat) and all(element_wise_concat):
try:
return np.concatenate(predicts, axis=1)
except Exception:
return np.concatenate(predicts, axis=0)
elif not all(chanel_concat) and not all(element_wise_concat):
prediction_2d = np.concatenate([x.reshape(x.shape[0], x.shape[1] * x.shape[2]) if len(x.shape) > 2
else x for x in predicts], axis=1)
return prediction_2d.reshape(prediction_2d.shape[0], 1, prediction_2d.shape[1])
else:
return np.concatenate(predicts, axis=1)


class IndustrialModels:
def __init__(self):
self.industrial_data_operation_path = pathlib.Path(PROJECT_PATH, 'fedot_ind',
Expand Down Expand Up @@ -341,6 +387,8 @@ def setup_repository(self):

setattr(PipelineSearchSpace, "get_parameters_dict", get_industrial_search_space)
setattr(ApiParamsRepository, "_get_default_mutations", _get_default_industrial_mutations)
setattr(ImageDataMerger, "preprocess_predicts", preprocess_predicts)
setattr(ImageDataMerger, "merge_predicts", merge_predicts)
class_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline)
MutationStrengthEnum = MutationStrengthEnumIndustrial
# common_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline)
Expand Down
23 changes: 20 additions & 3 deletions fedot_ind/core/tuning/search_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
'wavelet_basis':
{'n_components': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 10]},
'wavelet': {'hyperopt-dist': hp.choice,
'sampling-scope': [['mexh', 'shan', 'morl', 'cmor', 'fbsp', 'db5', 'sym5']]}},
'sampling-scope': [['mexh', 'morl', 'db5', 'sym5']]}},
'fourier_basis':
{'threshold': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [10000, 50000]}},
'topological_extractor':
Expand All @@ -26,8 +26,9 @@
# 'rec_metric': (hp.choice, [['chebyshev', 'cosine', 'euclidean', 'mahalanobis']])
},
'signal_extractor':
{'wavelet': {'hyperopt-dist': hp.choice,
'sampling-scope': [['mexh', 'shan', 'morl', 'cmor', 'fbsp', 'db5', 'sym5']]}},
{'n_components': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 10]},
'wavelet': {'hyperopt-dist': hp.choice,
'sampling-scope': [['mexh', 'morl', 'db5', 'sym5']]}},
'minirocket_extractor':
{'num_features': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5000, 20000, 1000)]]}},
'patch_tst_model':
Expand Down Expand Up @@ -320,6 +321,22 @@ def get_industrial_search_space(self):
'sampling-scope': [1, 365],
'type': 'discrete'}
},
'mlp': {
'hidden_layer_sizes': {
'hyperopt-dist': hp.choice,
'sampling-scope': [[(256, 128, 64, 32), (1028, 512, 64,)]],
'type': 'categorical'},
'activation': {
'hyperopt-dist': hp.choice,
'sampling-scope': [['logistic', 'tanh', 'relu']],
'type': 'categorical'},
'max_iter': {'hyperopt-dist': hp.uniformint,
'sampling-scope': [1000, 2000],
'type': 'discrete'},
'learning_rate': {'hyperopt-dist': hp.choice,
'sampling-scope': [['constant', 'adaptive']],
'type': 'categorical'}
},
'ar': {
'lag_1': {
'hyperopt-dist': hp.uniform,
Expand Down

0 comments on commit 2a79ed6

Please sign in to comment.