From 62bd8d9512ee18775a39514149b26ad2f1839976 Mon Sep 17 00:00:00 2001 From: ismonnar Date: Wed, 22 Jan 2025 02:49:22 +0300 Subject: [PATCH] adaptated examples for using with new ApiTemplate --- .../big_data/random_sampling_example.py | 29 ++++++- .../optimisation_history_visualisation.py | 85 +++++++++++++++++-- .../custom_strategy/multimodal/multimodal.py | 72 +++++++++++++++- .../specific_strategy/LoRa_example.py | 38 ++++++++- .../kernel_ensemble_example.py | 82 ++++++++++++++++-- .../probability_calibration_example.py | 24 +++++- 6 files changed, 310 insertions(+), 20 deletions(-) diff --git a/examples/automl_example/custom_strategy/big_data/random_sampling_example.py b/examples/automl_example/custom_strategy/big_data/random_sampling_example.py index bb8e441cf..cb561f15f 100644 --- a/examples/automl_example/custom_strategy/big_data/random_sampling_example.py +++ b/examples/automl_example/custom_strategy/big_data/random_sampling_example.py @@ -1,5 +1,7 @@ from examples.automl_example.custom_strategy.big_data.big_dataset_utils import create_big_dataset from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate +from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, \ + DEFAULT_AUTOML_LEARNING_CONFIG cur_params = {'rank': None} sampling_algorithm = {'CUR': cur_params} @@ -8,6 +10,29 @@ def eval_fedot_on_fold(dataset_name, fold): return create_big_dataset(dataset_name, fold) +INDUSTRIAL_PARAMS = {'data_type': 'tensor', + 'learning_strategy': 'big_dataset', + 'sampling_strategy': sampling_algorithm + } + +AUTOML_LEARNING_STRATEGY = DEFAULT_AUTOML_LEARNING_CONFIG +COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG +AUTOML_CONFIG = {'task': 'classification', + 'use_automl': True, + 'optimisation_strategy': {'optimisation_strategy': {'mutation_agent': 'bandit', + 'mutation_strategy': 'growth_mutation_strategy'}, + 'optimisation_agent': 'Industrial'}} +LEARNING_CONFIG = {'learning_strategy': 'from_scratch', + 'learning_strategy_params': AUTOML_LEARNING_STRATEGY, + 'optimisation_loss': {'quality_loss': 'f1'}} +INDUSTRIAL_CONFIG = {'problem': 'classification', + 'strategy': 'tabular', + 'strategy_params': INDUSTRIAL_PARAMS + } +API_CONFIG = {'industrial_config': INDUSTRIAL_CONFIG, + 'automl_config': AUTOML_CONFIG, + 'learning_config': LEARNING_CONFIG, + 'compute_config': COMPUTE_CONFIG} if __name__ == "__main__": metric_by_fold = {} @@ -31,8 +56,8 @@ def eval_fedot_on_fold(dataset_name, fold): n_jobs=-1, logging_level=20) for fold in range(10): - dataset_dict = create_big_dataset(dataset_name, fold) - result_dict = ApiTemplate(api_config=api_config, + dataset_dict = eval_fedot_on_fold(dataset_name, fold) + result_dict = ApiTemplate(api_config=API_CONFIG, metric_list=metric_names).eval(dataset=dataset_dict, finetune=finetune) metric_by_fold.update({fold: result_dict}) diff --git a/examples/automl_example/custom_strategy/explainability/optimisation_history_visualisation.py b/examples/automl_example/custom_strategy/explainability/optimisation_history_visualisation.py index 4faa675c3..d5f4247cc 100644 --- a/examples/automl_example/custom_strategy/explainability/optimisation_history_visualisation.py +++ b/examples/automl_example/custom_strategy/explainability/optimisation_history_visualisation.py @@ -1,9 +1,77 @@ +import numpy as np +from sklearn.utils import shuffle from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate +from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, \ + DEFAULT_AUTOML_LEARNING_CONFIG + +def load_data(dataset_dir='C:/Users/Bars/PycharmProjects/Fedot.Industrial/fedot_ind/data/Lightning7'): + data_train = np.genfromtxt(dataset_dir + f'/{dataset_name}_TRAIN.txt') + data_test = np.genfromtxt(dataset_dir + f'/{dataset_name}_TEST.txt') + train_features, train_target = data_train[:, 1:], data_train[:, 0] + test_features, test_target = data_test[:, 1:], data_test[:, 0] + train_features, train_target = shuffle(train_features, train_target) + input_train = (train_features, train_target) + input_test = (test_features, test_target) + + dataset = dict(test_data=input_test, train_data=input_train) + return dataset + + +def create_feature_generator_strategy(): + stat_params = {'window_size': 0, 'stride': 1, 'add_global_features': True, + 'channel_independent': False, 'use_sliding_window': False} + fourier_params = {'low_rank': 5, 'output_format': 'signal', 'compute_heuristic_representation': True, + 'approximation': 'smooth', 'threshold': 0.9, 'sampling_rate': 64e3} + wavelet_params = {'n_components': 3, 'wavelet': 'bior3.7', 'compute_heuristic_representation': True} + rocket_params = {"num_features": 200} + sampling_dict = dict(samples=dict(start_idx=0, + end_idx=None), + channels=dict(start_idx=0, + end_idx=None), + elements=dict(start_idx=0, + end_idx=None)) + feature_generator = { + # 'minirocket': [('minirocket_extractor', rocket_params)], + 'stat_generator': [('quantile_extractor', stat_params)], + 'fourier': [('fourier_basis', fourier_params)], + 'wavelet': [('wavelet_basis', wavelet_params)], + } + return feature_generator, sampling_dict + + +feature_generator, sampling_dict = create_feature_generator_strategy() + +INDUSTRIAL_PARAMS = {'feature_generator': feature_generator, + 'data_type': 'tensor', + 'learning_strategy': 'ts2tabular', + 'sampling_strategy': sampling_dict + } + +# DEFINE ALL CONFIG FOR API +AUTOML_LEARNING_STRATEGY = DEFAULT_AUTOML_LEARNING_CONFIG +COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG +AUTOML_CONFIG = {'task': 'classification', + 'use_automl': True, + 'optimisation_strategy': {'optimisation_strategy': {'mutation_agent': 'bandit', + 'mutation_strategy': 'growth_mutation_strategy'}, + 'optimisation_agent': 'Industrial'}} +LEARNING_CONFIG = {'learning_strategy': 'from_scratch', + 'learning_strategy_params': AUTOML_LEARNING_STRATEGY, + 'optimisation_loss': {'quality_loss': 'f1'}} +INDUSTRIAL_CONFIG = {'problem': 'classification', + 'strategy': 'tabular', + 'strategy_params': INDUSTRIAL_PARAMS + } +API_CONFIG = {'industrial_config': INDUSTRIAL_CONFIG, + 'automl_config': AUTOML_CONFIG, + 'learning_config': LEARNING_CONFIG, + 'compute_config': COMPUTE_CONFIG} if __name__ == "__main__": dataset_name = 'Lightning7' + dataset = load_data() finetune = False - metric_names = ('f1', 'accuracy') + metric_names = ('f1', 'accuracy', 'precision', 'roc_auc') api_config = dict(problem='classification', metric='f1', timeout=5, @@ -13,10 +81,11 @@ n_jobs=-1, logging_level=10) - result_dict = ApiTemplate(api_config=api_config, - metric_list=('f1', 'accuracy')).eval(dataset=dataset_name, - finetune=finetune) - - opt_hist = result_dict['industrial_model'].save_optimization_history(return_history=True) - opt_hist = result_dict['industrial_model'].vis_optimisation_history( - opt_history_path=opt_hist, return_history=True) + result_dict = ApiTemplate(api_config=API_CONFIG, metric_list=metric_names).eval(dataset=dataset, finetune=finetune) + metrics = result_dict['metrics'] + metrics.to_csv('./metrics.csv') + hist = result_dict['industrial_model'].save_optimization_history(return_history=True) + result_dict['industrial_model'].vis_optimisation_history(hist) + result_dict['industrial_model'].save_best_model() + result_dict['industrial_model'].solver.current_pipeline.show() + _ = 1 diff --git a/examples/automl_example/custom_strategy/multimodal/multimodal.py b/examples/automl_example/custom_strategy/multimodal/multimodal.py index d7409ba37..b18273c3b 100644 --- a/examples/automl_example/custom_strategy/multimodal/multimodal.py +++ b/examples/automl_example/custom_strategy/multimodal/multimodal.py @@ -1,7 +1,75 @@ +import numpy as np +from sklearn.utils import shuffle from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate +from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, \ + DEFAULT_AUTOML_LEARNING_CONFIG + +def load_data(dataset_dir='C:/Users/Bars/PycharmProjects/Fedot.Industrial/fedot_ind/data/Lightning7'): + data_train = np.genfromtxt(dataset_dir + f'/{dataset_name}_TRAIN.txt') + data_test = np.genfromtxt(dataset_dir + f'/{dataset_name}_TEST.txt') + train_features, train_target = data_train[:, 1:], data_train[:, 0] + test_features, test_target = data_test[:, 1:], data_test[:, 0] + train_features, train_target = shuffle(train_features, train_target) + input_train = (train_features, train_target) + input_test = (test_features, test_target) + + dataset = dict(test_data=input_test, train_data=input_train) + return dataset + + +def create_feature_generator_strategy(): + stat_params = {'window_size': 0, 'stride': 1, 'add_global_features': True, + 'channel_independent': False, 'use_sliding_window': False} + fourier_params = {'low_rank': 5, 'output_format': 'signal', 'compute_heuristic_representation': True, + 'approximation': 'smooth', 'threshold': 0.9, 'sampling_rate': 64e3} + wavelet_params = {'n_components': 3, 'wavelet': 'bior3.7', 'compute_heuristic_representation': True} + rocket_params = {"num_features": 200} + sampling_dict = dict(samples=dict(start_idx=0, + end_idx=None), + channels=dict(start_idx=0, + end_idx=None), + elements=dict(start_idx=0, + end_idx=None)) + feature_generator = { + # 'minirocket': [('minirocket_extractor', rocket_params)], + 'stat_generator': [('quantile_extractor', stat_params)], + 'fourier': [('fourier_basis', fourier_params)], + 'wavelet': [('wavelet_basis', wavelet_params)], + } + return feature_generator, sampling_dict + + +feature_generator, sampling_dict = create_feature_generator_strategy() + +INDUSTRIAL_PARAMS = {'feature_generator': feature_generator, + 'data_type': 'tensor', + 'learning_strategy': 'ts2tabular', + 'sampling_strategy': sampling_dict + } + +# DEFINE ALL CONFIG FOR API +AUTOML_LEARNING_STRATEGY = DEFAULT_AUTOML_LEARNING_CONFIG +COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG +AUTOML_CONFIG = {'task': 'classification', + 'use_automl': True, + 'optimisation_strategy': {'optimisation_strategy': {'mutation_agent': 'bandit', + 'mutation_strategy': 'growth_mutation_strategy'}, + 'optimisation_agent': 'Industrial'}} +LEARNING_CONFIG = {'learning_strategy': 'from_scratch', + 'learning_strategy_params': AUTOML_LEARNING_STRATEGY, + 'optimisation_loss': {'quality_loss': 'f1'}} +INDUSTRIAL_CONFIG = {'problem': 'classification', + 'strategy': 'tabular', + 'strategy_params': INDUSTRIAL_PARAMS + } +API_CONFIG = {'industrial_config': INDUSTRIAL_CONFIG, + 'automl_config': AUTOML_CONFIG, + 'learning_config': LEARNING_CONFIG, + 'compute_config': COMPUTE_CONFIG} if __name__ == "__main__": dataset_name = 'Lightning7' + dataset = load_data() finetune = False metric_names = ('f1', 'accuracy') multimodal_pipeline = {'recurrence_extractor': { @@ -24,8 +92,8 @@ n_jobs=-1, logging_level=10) - result_dict = ApiTemplate(api_config=api_config, - metric_list=('f1', 'accuracy')).eval(dataset=dataset_name, + result_dict = ApiTemplate(api_config=API_CONFIG, + metric_list=metric_names).eval(dataset=dataset, finetune=finetune, initial_assumption=multimodal_pipeline) result_dict['industrial_model'].explain(explain_config) diff --git a/examples/automl_example/custom_strategy/specific_strategy/LoRa_example.py b/examples/automl_example/custom_strategy/specific_strategy/LoRa_example.py index febccf1e5..5256725e5 100644 --- a/examples/automl_example/custom_strategy/specific_strategy/LoRa_example.py +++ b/examples/automl_example/custom_strategy/specific_strategy/LoRa_example.py @@ -1,5 +1,8 @@ import torchvision.datasets as datasets import torchvision.transforms as transforms +from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate +from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, \ + DEFAULT_AUTOML_LEARNING_CONFIG from fedot_ind.api.main import FedotIndustrial @@ -38,7 +41,40 @@ industrial_strategy_params=lora_params, logging_level=20) -industrial = FedotIndustrial(**api_config) +AUTOML_LEARNING_STRATEGY = DEFAULT_AUTOML_LEARNING_CONFIG +COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG +AUTOML_CONFIG = {'task': 'classification', + 'use_automl': True, + 'optimisation_strategy': {'optimisation_strategy': {'mutation_agent': 'bandit', + 'mutation_strategy': 'growth_mutation_strategy'}, + 'optimisation_agent': 'Industrial'}} + +LEARNING_CONFIG = {'learning_strategy': 'from_scratch', + 'learning_strategy_params': AUTOML_LEARNING_STRATEGY, + 'optimisation_loss': {'quality_loss': 'accuracy'}} + +INDUSTRIAL_PARAMS = {'rank': 2, + 'sampling_share': 0.5, + 'lora_init': 'random', + 'epochs': 1, + 'batch_size': 10, + 'data_type': 'tensor' + } + +INDUSTRIAL_CONFIG = {'problem': 'classification', + 'strategy': 'lora_strategy', + 'strategy_params': INDUSTRIAL_PARAMS + } + +API_CONFIG = {'industrial_config': INDUSTRIAL_CONFIG, + 'automl_config': AUTOML_CONFIG, + 'learning_config': LEARNING_CONFIG, + 'compute_config': COMPUTE_CONFIG} + +dataset = dict(test_data=test_data, train_data=train_data) + +industrial = ApiTemplate(api_config=API_CONFIG, + metric_list=metric_names).eval(dataset=dataset) industrial.fit(train_data) predict = industrial.predict(test_data) _ = 1 diff --git a/examples/automl_example/custom_strategy/specific_strategy/kernel_ensemble_example.py b/examples/automl_example/custom_strategy/specific_strategy/kernel_ensemble_example.py index b2b1415b1..93f1095cb 100644 --- a/examples/automl_example/custom_strategy/specific_strategy/kernel_ensemble_example.py +++ b/examples/automl_example/custom_strategy/specific_strategy/kernel_ensemble_example.py @@ -1,7 +1,77 @@ from fedot_ind.api.main import FedotIndustrial from fedot_ind.tools.loader import DataLoader +import numpy as np +from sklearn.utils import shuffle +from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate +from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, \ + DEFAULT_AUTOML_LEARNING_CONFIG + +def load_data(dataset_dir='C:/Users/Bars/PycharmProjects/Fedot.Industrial/fedot_ind/data/Lightning7'): + data_train = np.genfromtxt(dataset_dir + f'/{dataset_name}_TRAIN.txt') + data_test = np.genfromtxt(dataset_dir + f'/{dataset_name}_TEST.txt') + train_features, train_target = data_train[:, 1:], data_train[:, 0] + test_features, test_target = data_test[:, 1:], data_test[:, 0] + train_features, train_target = shuffle(train_features, train_target) + input_train = (train_features, train_target) + input_test = (test_features, test_target) + + dataset = dict(test_data=input_test, train_data=input_train) + return dataset + + +def create_feature_generator_strategy(): + stat_params = {'window_size': 0, 'stride': 1, 'add_global_features': True, + 'channel_independent': False, 'use_sliding_window': False} + fourier_params = {'low_rank': 5, 'output_format': 'signal', 'compute_heuristic_representation': True, + 'approximation': 'smooth', 'threshold': 0.9, 'sampling_rate': 64e3} + wavelet_params = {'n_components': 3, 'wavelet': 'bior3.7', 'compute_heuristic_representation': True} + rocket_params = {"num_features": 200} + sampling_dict = dict(samples=dict(start_idx=0, + end_idx=None), + channels=dict(start_idx=0, + end_idx=None), + elements=dict(start_idx=0, + end_idx=None)) + feature_generator = { + # 'minirocket': [('minirocket_extractor', rocket_params)], + 'stat_generator': [('quantile_extractor', stat_params)], + 'fourier': [('fourier_basis', fourier_params)], + 'wavelet': [('wavelet_basis', wavelet_params)], + } + return feature_generator, sampling_dict + + +feature_generator, sampling_dict = create_feature_generator_strategy() + +INDUSTRIAL_PARAMS = {'feature_generator': feature_generator, + 'data_type': 'tensor', + 'learning_strategy': 'all_classes', + 'head_model': 'rf', + 'sampling_strategy': sampling_dict + } + +# DEFINE ALL CONFIG FOR API +AUTOML_LEARNING_STRATEGY = DEFAULT_AUTOML_LEARNING_CONFIG +COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG +AUTOML_CONFIG = {'task': 'classification', + 'use_automl': True, + 'optimisation_strategy': {'optimisation_strategy': {'mutation_agent': 'bandit', + 'mutation_strategy': 'growth_mutation_strategy'}, + 'optimisation_agent': 'Industrial'}} +LEARNING_CONFIG = {'learning_strategy': 'from_scratch', + 'learning_strategy_params': AUTOML_LEARNING_STRATEGY, + 'optimisation_loss': {'quality_loss': 'f1'}} +INDUSTRIAL_CONFIG = {'problem': 'classification', + 'strategy': 'kernel_automl', + 'strategy_params': INDUSTRIAL_PARAMS + } +API_CONFIG = {'industrial_config': INDUSTRIAL_CONFIG, + 'automl_config': AUTOML_CONFIG, + 'learning_config': LEARNING_CONFIG, + 'compute_config': COMPUTE_CONFIG} dataset_name = 'Lightning7' +dataset = load_data() metric_names = ('f1', 'accuracy') api_config = dict( problem='classification', @@ -19,10 +89,10 @@ logging_level=20) if __name__ == "__main__": - train_data, test_data = DataLoader(dataset_name).load_data() - industrial = FedotIndustrial(**api_config) - industrial.fit(train_data) - predict = industrial.predict(test_data, 'ensemble') - predict_proba = industrial.predict_proba(test_data, 'ensemble') - metric = industrial.get_metrics(target=test_data[1], + industrial = ApiTemplate(api_config=API_CONFIG, + metric_list=metric_names).eval(dataset=dataset) + industrial.fit(dataset.get("train_data")) + predict = industrial.predict(dataset.get("test_data"), 'ensemble') + predict_proba = industrial.predict_proba(dataset.get("test_data"), 'ensemble') + metric = industrial.get_metrics(target=dataset.get("test_data")[1], metric_names=metric_names) diff --git a/examples/automl_example/custom_strategy/specific_strategy/probability_calibration_example.py b/examples/automl_example/custom_strategy/specific_strategy/probability_calibration_example.py index 5f67e5143..0ce6ab8bc 100644 --- a/examples/automl_example/custom_strategy/specific_strategy/probability_calibration_example.py +++ b/examples/automl_example/custom_strategy/specific_strategy/probability_calibration_example.py @@ -1,6 +1,28 @@ import numpy as np from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate +from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, \ + DEFAULT_AUTOML_LEARNING_CONFIG + +INDUSTRIAL_PARAMS = {'data_type': 'tensor', + 'learning_strategy': 'ts2tabular' + } + +# DEFINE ALL CONFIG FOR API +AUTOML_LEARNING_STRATEGY = DEFAULT_AUTOML_LEARNING_CONFIG +COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG +AUTOML_CONFIG = {'task': 'classification'} +LEARNING_CONFIG = {'learning_strategy': 'from_scratch', + 'learning_strategy_params': AUTOML_LEARNING_STRATEGY, + 'optimisation_loss': {'quality_loss': 'f1'}} +INDUSTRIAL_CONFIG = {'problem': 'classification', + 'strategy': 'tabular', + 'strategy_params': INDUSTRIAL_PARAMS + } +API_CONFIG = {'industrial_config': INDUSTRIAL_CONFIG, + 'automl_config': AUTOML_CONFIG, + 'learning_config': LEARNING_CONFIG, + 'compute_config': COMPUTE_CONFIG} if __name__ == "__main__": dataset_name = 'Libras' @@ -10,7 +32,7 @@ timeout=0.1, n_jobs=2, logging_level=20) - api_client = ApiTemplate(api_config=api_config, + api_client = ApiTemplate(api_config=API_CONFIG, metric_list=('f1', 'accuracy')) result_dict = api_client.eval(dataset=dataset_name, finetune=finetune) uncalibrated_labels, uncalibrated_probs = result_dict['industrial_model'].predicted_labels, \