Skip to content

Commit

Permalink
add target encodingt to API, fixes with RAF algo
Browse files Browse the repository at this point in the history
  • Loading branch information
v1docq committed Feb 8, 2024
1 parent c766680 commit 97f6a2c
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 18 deletions.
18 changes: 13 additions & 5 deletions fedot_ind/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def __init__(self, **kwargs):
self.preprocessing = kwargs.get('industrial_preprocessing', False)
self.backend_method = kwargs.get('backend', 'cpu')
self.RAF_workers = kwargs.get('RAF_workers', None)
self.path_to_composition_results = kwargs.get('history_dir', None)
prefix = './composition_results' if self.path_to_composition_results is None \
else self.path_to_composition_results
Path(prefix).mkdir(parents=True, exist_ok=True)

if self.output_folder is None:
self.output_folder = default_path_to_save_results
Expand All @@ -93,9 +97,11 @@ def __init__(self, **kwargs):
self.predicted_probs = None
self.predict_data = None
self.config_dict = kwargs
self.config_dict['history_dir'] = prefix
self.config_dict['available_operations'] = kwargs.get('available_operations',
default_industrial_availiable_operation(
self.config_dict['problem']))

self.config_dict['optimizer'] = kwargs.get(
'optimizer', IndustrialEvoOptimizer)
self.config_dict['initial_assumption'] = kwargs.get('initial_assumption',
Expand Down Expand Up @@ -176,8 +182,10 @@ def fit(self,
"""
self.train_data = deepcopy(
input_data) # we do not want to make inplace changes
self.train_data = DataCheck(
input_data=self.train_data, task=self.config_dict['problem']).check_input_data()
input_preproc = DataCheck(
input_data=self.train_data, task=self.config_dict['problem'])
self.train_data = input_preproc.check_input_data()
self.target_encoder = input_preproc.get_target_encoder()
self.solver = self.__init_solver()
if self.preprocessing:
self._preprocessing_strategy(self.train_data)
Expand Down Expand Up @@ -205,8 +213,8 @@ def predict(self,
elif isinstance(self.solver, list):
predict = self._predict_raf_ensemble()
else:
predict = self.solver.predict(self.predict_data, 'labels').predict
self.predicted_labels = predict
predict = self.solver.predict(self.predict_data, 'labels')
self.predicted_labels = predict if self.target_encoder is None else self.target_encoder.transform(predict)
return self.predicted_labels

def predict_proba(self,
Expand All @@ -229,7 +237,7 @@ def predict_proba(self,
elif isinstance(self.solver, list):
return self.predicted_probs if self.predicted_probs is not None else self._predict_raf_ensemble()
else:
predict = self.solver.predict(self.predict_data, 'probs').predict
predict = self.solver.predict(self.predict_data, 'probs')
self.predicted_probs = predict
return self.predicted_probs

Expand Down
12 changes: 9 additions & 3 deletions fedot_ind/api/utils/checkers_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(self,
self.input_data = input_data
self.task = task
self.task_dict = FEDOT_TASK
self.label_encoder = None

def __check_features_and_target(self, X, y):
multi_features, X = check_multivariate_data(X)
Expand Down Expand Up @@ -72,9 +73,11 @@ def _init_input_data(self) -> None:
features, is_multivariate_data, target = self.__check_features_and_target(
X, y)

if y is not None and type(y[0]) is np.str_ and self.task == 'classification':
label_encoder = LabelEncoder()
target = label_encoder.fit_transform(target)
if self.label_encoder is None and type(y[0]) is np.str_ and self.task == 'classification':
self.label_encoder = LabelEncoder()
target = self.label_encoder.fit_transform(target)
else:
self.label_encoder = self.label_encoder

if is_multivariate_data:
self.input_data = InputData(idx=np.arange(len(X)),
Expand Down Expand Up @@ -146,3 +149,6 @@ def check_input_data(self) -> InputData:
self._check_input_data_features()
self._check_input_data_target()
return self.input_data

def get_target_encoder(self):
return self.label_encoder
14 changes: 12 additions & 2 deletions fedot_ind/core/ensemble/random_automl_forest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from copy import deepcopy

from fedot.core.data.data import InputData
from fedot.core.data.multi_modal import MultiModalData
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.repository.dataset_types import DataTypesEnum
from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold, train_test_split
from fedot_ind.core.architecture.settings.computational import backend_methods as np
from fedot_ind.core.repository.constanst_repository import FEDOT_ATOMIZE_OPERATION, FEDOT_HEAD_ENSEMBLE, FEDOT_TASK
from fedot_ind.core.repository.constanst_repository import FEDOT_ATOMIZE_OPERATION, FEDOT_HEAD_ENSEMBLE, FEDOT_TASK, \
FEDOT_ENSEMBLE_ASSUMPTIONS
from fedot_ind.core.repository.model_repository import SKLEARN_CLF_MODELS, SKLEARN_REG_MODELS


class RAFensembler:
Expand Down Expand Up @@ -81,7 +85,13 @@ def _raf_ensemble(self, features, target, n_splits):
branch_idx=i)
data_dict.update({f'data_source_img/{i}': train_fold})
train_multimodal = MultiModalData(data_dict)
head_automl_params = deepcopy(self.atomized_automl_params)
head_automl_params['available_operations'] = [operation for operation
in head_automl_params['available_operations']
if operation in list(SKLEARN_CLF_MODELS.keys())
or operation in list(SKLEARN_REG_MODELS.keys())]
head_automl_params['initial_assumption'] = FEDOT_ENSEMBLE_ASSUMPTIONS[self.atomized_automl_params['problem']].build()
raf_ensemble = raf_ensemble.join_branches(self.head,
params=self.atomized_automl_params).build()
params=head_automl_params).build()
raf_ensemble.fit(input_data=train_multimodal)
return raf_ensemble
8 changes: 7 additions & 1 deletion fedot_ind/core/repository/constanst_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class ComputationalConstant(Enum):
}
BATCH_SIZE_FOR_FEDOT_WORKER = 1000
FEDOT_WORKER_NUM = 5
FEDOT_WORKER_TIMEOUT_PARTITION = 2
FEDOT_WORKER_TIMEOUT_PARTITION = 4
PATIENCE_FOR_EARLY_STOP = 15


Expand Down Expand Up @@ -186,6 +186,11 @@ class FedotOperationConstant(Enum):
'ts_forecasting': PipelineBuilder().add_node('lagged').add_node('ridge')
}

FEDOT_ENSEMBLE_ASSUMPTIONS = {
'classification': PipelineBuilder().add_node('logit'),
'regression': PipelineBuilder().add_node('treg')
}


class ModelCompressionConstant(Enum):
ENERGY_THR = [0.9, 0.95, 0.99, 0.999]
Expand Down Expand Up @@ -405,6 +410,7 @@ class BenchmarkDatasets(Enum):
FEDOT_TUNING_METRICS = FedotOperationConstant.FEDOT_TUNING_METRICS.value
FEDOT_ASSUMPTIONS = FedotOperationConstant.FEDOT_ASSUMPTIONS.value
FEDOT_API_PARAMS = FedotOperationConstant.FEDOT_API_PARAMS.value
FEDOT_ENSEMBLE_ASSUMPTIONS = FedotOperationConstant.FEDOT_ENSEMBLE_ASSUMPTIONS.value

CPU_NUMBERS = ComputationalConstant.CPU_NUMBERS.value
BATCH_SIZE_FOR_FEDOT_WORKER = ComputationalConstant.BATCH_SIZE_FOR_FEDOT_WORKER.value
Expand Down
34 changes: 27 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,36 @@ MKLpy==0.6
PyMonad==2.4.0
PyWavelets==1.4.1

fastcore
fastai
distributed
datasetsforecast
fastcore~=1.5.29
fastai~=2.7.14
distributed~=2024.1.1
datasetsforecast~=0.0.8

tensorly==0.8.1
torch==2.0.0
torch~=2.2.0
torchmetrics==0.11.4
torchvision==0.15.1
torchvision~=0.17.0
tensorboard>=2.12.0
statsforecast==1.5.0

chardet
chardet~=5.2.0
sphinx~=7.2.6
numpy~=1.24.4
pytest~=8.0.0
matplotlib~=3.8.2
pandas~=1.5.3
fedot~=0.7.3
scipy~=1.12.0
typing~=3.7.4.3
scikit-learn~=1.2.2
PyYAML~=6.0.1
setuptools~=60.2.0
pillow~=10.2.0
tqdm~=4.65.2
seaborn~=0.13.2
joblib~=1.3.2
hyperopt~=0.2.7
ripser~=0.6.4
statsmodels~=0.14.1
xgboost~=2.0.3
sktime~=0.16.1

0 comments on commit 97f6a2c

Please sign in to comment.