Skip to content

Commit

Permalink
feat: add feature generator strategy creator function in example util…
Browse files Browse the repository at this point in the history
…s, refactor examples
  • Loading branch information
Lopa10ko committed Jan 22, 2025
1 parent fb549e0 commit 003ae52
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 277 deletions.
Original file line number Diff line number Diff line change
@@ -1,65 +1,34 @@
from examples.automl_example.custom_strategy.big_data.big_dataset_utils import create_big_dataset
from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate
from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, \
DEFAULT_AUTOML_LEARNING_CONFIG

cur_params = {'rank': None}
sampling_algorithm = {'CUR': cur_params}


def eval_fedot_on_fold(dataset_name, fold):
return create_big_dataset(dataset_name, fold)
from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, DEFAULT_AUTOML_LEARNING_CONFIG, \
DEFAULT_CLF_AUTOML_CONFIG

DATASET_NAME = 'airlines'
METRIC_NAMES = ('f1', 'accuracy', 'precision', 'roc_auc')

INDUSTRIAL_PARAMS = {'data_type': 'tensor',
'learning_strategy': 'big_dataset',
'sampling_strategy': sampling_algorithm
}
'sampling_strategy': {'CUR': {'rank': None}}}

AUTOML_LEARNING_STRATEGY = DEFAULT_AUTOML_LEARNING_CONFIG
COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG
AUTOML_CONFIG = {'task': 'classification',
'use_automl': True,
'optimisation_strategy': {'optimisation_strategy': {'mutation_agent': 'bandit',
'mutation_strategy': 'growth_mutation_strategy'},
'optimisation_agent': 'Industrial'}}
LEARNING_CONFIG = {'learning_strategy': 'from_scratch',
'learning_strategy_params': AUTOML_LEARNING_STRATEGY,
'learning_strategy_params': DEFAULT_AUTOML_LEARNING_CONFIG,
'optimisation_loss': {'quality_loss': 'f1'}}

INDUSTRIAL_CONFIG = {'problem': 'classification',
'strategy': 'tabular',
'strategy_params': INDUSTRIAL_PARAMS
}
'strategy_params': INDUSTRIAL_PARAMS}

API_CONFIG = {'industrial_config': INDUSTRIAL_CONFIG,
'automl_config': AUTOML_CONFIG,
'automl_config': DEFAULT_CLF_AUTOML_CONFIG,
'learning_config': LEARNING_CONFIG,
'compute_config': COMPUTE_CONFIG}
'compute_config': DEFAULT_COMPUTE_CONFIG}

if __name__ == "__main__":
metric_by_fold = {}
finetune = False
metric_names = ('f1', 'accuracy')
dataset_name = 'airlines'
api_config = dict(problem='classification',
metric='f1',
timeout=20,
pop_size=3,
early_stopping_iterations=10,
early_stopping_timeout=30,
optimizer_params={'mutation_agent': 'bandit',
'mutation_strategy': 'growth_mutation_strategy'},
with_tunig=False,
preset='classification_tabular',
industrial_strategy_params={'data_type': 'tensor',
'learning_strategy': 'big_dataset',
'sampling_strategy': sampling_algorithm
},
n_jobs=-1,
logging_level=20)
for fold in range(10):
dataset_dict = eval_fedot_on_fold(dataset_name, fold)
dataset_dict = create_big_dataset(DATASET_NAME, fold)
result_dict = ApiTemplate(api_config=API_CONFIG,
metric_list=metric_names).eval(dataset=dataset_dict,
finetune=finetune)
metric_list=METRIC_NAMES).eval(dataset=dataset_dict,
finetune=False)
metric_by_fold.update({fold: result_dict})
_ = 1
print(metric_by_fold)
Original file line number Diff line number Diff line change
@@ -1,88 +1,32 @@
import numpy as np
from sklearn.utils import shuffle
from examples.example_utils import create_feature_generator_strategy
from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate
from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, \
DEFAULT_AUTOML_LEARNING_CONFIG


def load_data(dataset_dir='./fedot_ind/data/Lightning7'):
data_train = np.genfromtxt(dataset_dir + f'/{dataset_name}_TRAIN.txt')
data_test = np.genfromtxt(dataset_dir + f'/{dataset_name}_TEST.txt')
train_features, train_target = data_train[:, 1:], data_train[:, 0]
test_features, test_target = data_test[:, 1:], data_test[:, 0]
train_features, train_target = shuffle(train_features, train_target)
input_train = (train_features, train_target)
input_test = (test_features, test_target)

dataset = dict(test_data=input_test, train_data=input_train)
return dataset


def create_feature_generator_strategy():
stat_params = {'window_size': 0, 'stride': 1, 'add_global_features': True,
'channel_independent': False, 'use_sliding_window': False}
fourier_params = {'low_rank': 5, 'output_format': 'signal', 'compute_heuristic_representation': True,
'approximation': 'smooth', 'threshold': 0.9, 'sampling_rate': 64e3}
wavelet_params = {'n_components': 3, 'wavelet': 'bior3.7', 'compute_heuristic_representation': True}
rocket_params = {"num_features": 200}
sampling_dict = dict(samples=dict(start_idx=0,
end_idx=None),
channels=dict(start_idx=0,
end_idx=None),
elements=dict(start_idx=0,
end_idx=None))
feature_generator = {
# 'minirocket': [('minirocket_extractor', rocket_params)],
'stat_generator': [('quantile_extractor', stat_params)],
'fourier': [('fourier_basis', fourier_params)],
'wavelet': [('wavelet_basis', wavelet_params)],
}
return feature_generator, sampling_dict
DEFAULT_AUTOML_LEARNING_CONFIG, DEFAULT_CLF_AUTOML_CONFIG


feature_generator, sampling_dict = create_feature_generator_strategy()

METRIC_NAMES = ('f1', 'accuracy', 'precision', 'roc_auc')
INDUSTRIAL_PARAMS = {'feature_generator': feature_generator,
'data_type': 'tensor',
'learning_strategy': 'ts2tabular',
'sampling_strategy': sampling_dict
}
'sampling_strategy': sampling_dict}

# DEFINE ALL CONFIG FOR API
AUTOML_LEARNING_STRATEGY = DEFAULT_AUTOML_LEARNING_CONFIG
COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG
AUTOML_CONFIG = {'task': 'classification',
'use_automl': True,
'optimisation_strategy': {'optimisation_strategy': {'mutation_agent': 'bandit',
'mutation_strategy': 'growth_mutation_strategy'},
'optimisation_agent': 'Industrial'}}
LEARNING_CONFIG = {'learning_strategy': 'from_scratch',
'learning_strategy_params': AUTOML_LEARNING_STRATEGY,
'learning_strategy_params': DEFAULT_AUTOML_LEARNING_CONFIG,
'optimisation_loss': {'quality_loss': 'f1'}}
INDUSTRIAL_CONFIG = {'problem': 'classification',
'strategy': 'tabular',
'strategy_params': INDUSTRIAL_PARAMS
}
API_CONFIG = {'industrial_config': INDUSTRIAL_CONFIG,
'automl_config': AUTOML_CONFIG,
'automl_config': DEFAULT_CLF_AUTOML_CONFIG,
'learning_config': LEARNING_CONFIG,
'compute_config': COMPUTE_CONFIG}
'compute_config': DEFAULT_COMPUTE_CONFIG}

if __name__ == "__main__":
dataset_name = 'Lightning7'
dataset = load_data()
finetune = False
metric_names = ('f1', 'accuracy', 'precision', 'roc_auc')
api_config = dict(problem='classification',
metric='f1',
timeout=5,
pop_size=5,
with_tuning=False,
cv_folds=3,
n_jobs=-1,
logging_level=10)

result_dict = ApiTemplate(api_config=API_CONFIG, metric_list=metric_names).eval(dataset=dataset, finetune=finetune)
result_dict = ApiTemplate(api_config=API_CONFIG,
metric_list=METRIC_NAMES).eval(dataset='Lightning7',
finetune=False)
metrics = result_dict['metrics']
metrics.to_csv('./metrics.csv')
hist = result_dict['industrial_model'].save_optimization_history(return_history=True)
Expand Down
88 changes: 16 additions & 72 deletions examples/automl_example/custom_strategy/multimodal/multimodal.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,7 @@
import numpy as np
from sklearn.utils import shuffle
from examples.example_utils import create_feature_generator_strategy
from fedot_ind.core.architecture.pipelines.abstract_pipeline import ApiTemplate
from fedot_ind.core.repository.config_repository import DEFAULT_COMPUTE_CONFIG, \
DEFAULT_AUTOML_LEARNING_CONFIG


def load_data(dataset_dir='./fedot_ind/data/Lightning7'):
data_train = np.genfromtxt(dataset_dir + f'/{dataset_name}_TRAIN.txt')
data_test = np.genfromtxt(dataset_dir + f'/{dataset_name}_TEST.txt')
train_features, train_target = data_train[:, 1:], data_train[:, 0]
test_features, test_target = data_test[:, 1:], data_test[:, 0]
train_features, train_target = shuffle(train_features, train_target)
input_train = (train_features, train_target)
input_test = (test_features, test_target)

dataset = dict(test_data=input_test, train_data=input_train)
return dataset


def create_feature_generator_strategy():
stat_params = {'window_size': 0, 'stride': 1, 'add_global_features': True,
'channel_independent': False, 'use_sliding_window': False}
fourier_params = {'low_rank': 5, 'output_format': 'signal', 'compute_heuristic_representation': True,
'approximation': 'smooth', 'threshold': 0.9, 'sampling_rate': 64e3}
wavelet_params = {'n_components': 3, 'wavelet': 'bior3.7', 'compute_heuristic_representation': True}
rocket_params = {"num_features": 200}
sampling_dict = dict(samples=dict(start_idx=0,
end_idx=None),
channels=dict(start_idx=0,
end_idx=None),
elements=dict(start_idx=0,
end_idx=None))
feature_generator = {
# 'minirocket': [('minirocket_extractor', rocket_params)],
'stat_generator': [('quantile_extractor', stat_params)],
'fourier': [('fourier_basis', fourier_params)],
'wavelet': [('wavelet_basis', wavelet_params)],
}
return feature_generator, sampling_dict

DEFAULT_AUTOML_LEARNING_CONFIG, DEFAULT_CLF_AUTOML_CONFIG

feature_generator, sampling_dict = create_feature_generator_strategy()

Expand All @@ -48,53 +11,34 @@ def create_feature_generator_strategy():
'sampling_strategy': sampling_dict
}

# DEFINE ALL CONFIG FOR API
AUTOML_LEARNING_STRATEGY = DEFAULT_AUTOML_LEARNING_CONFIG
COMPUTE_CONFIG = DEFAULT_COMPUTE_CONFIG
AUTOML_CONFIG = {'task': 'classification',
'use_automl': True,
'optimisation_strategy': {'optimisation_strategy': {'mutation_agent': 'bandit',
'mutation_strategy': 'growth_mutation_strategy'},
'optimisation_agent': 'Industrial'}}
DATASET_NAME = 'Lightning7'
METRIC_NAMES = ('f1', 'accuracy')

LEARNING_CONFIG = {'learning_strategy': 'from_scratch',
'learning_strategy_params': AUTOML_LEARNING_STRATEGY,
'learning_strategy_params': DEFAULT_AUTOML_LEARNING_CONFIG,
'optimisation_loss': {'quality_loss': 'f1'}}
INDUSTRIAL_CONFIG = {'problem': 'classification',
'strategy': 'tabular',
'strategy_params': INDUSTRIAL_PARAMS
}
API_CONFIG = {'industrial_config': INDUSTRIAL_CONFIG,
'automl_config': AUTOML_CONFIG,
'automl_config': DEFAULT_CLF_AUTOML_CONFIG,
'learning_config': LEARNING_CONFIG,
'compute_config': COMPUTE_CONFIG}
'compute_config': DEFAULT_COMPUTE_CONFIG}

if __name__ == "__main__":
dataset_name = 'Lightning7'
dataset = load_data()
finetune = False
metric_names = ('f1', 'accuracy')
multimodal_pipeline = {'recurrence_extractor': {
'window_size': 30,
'stride': 5,
'image_mode': True},
'resnet_model': {
'epochs': 1,
'batch_size': 16,
'model_name': 'ResNet50'}}
multimodal_pipeline = {0: [
# ('recurrence_extractor', {'window_size': 30, 'stride': 5, 'image_mode': True}),
('quantile_extractor', {'window_size': 30, 'stride': 5, 'image_mode': True}),
('resnet_model', {'epochs': 1, 'batch_size': 16, 'model_name': 'ResNet50'})
]}

explain_config = {'method': 'recurrence',
'samples': 1,
'metric': 'mean'}
api_config = dict(problem='classification',
metric='f1',
timeout=0.1,
pop_size=5,
with_tuning=False,
cv_folds=3,
n_jobs=-1,
logging_level=10)

result_dict = ApiTemplate(api_config=API_CONFIG,
metric_list=metric_names).eval(dataset=dataset,
finetune=finetune,
metric_list=METRIC_NAMES).eval(dataset=DATASET_NAME,
finetune=False,
initial_assumption=multimodal_pipeline)
result_dict['industrial_model'].explain(explain_config)
Loading

0 comments on commit 003ae52

Please sign in to comment.