diff --git a/examples/pipeline_example/time_series/ts_classification/advanced_example.py b/examples/pipeline_example/time_series/ts_classification/advanced_example.py index 5dbb37d03..be587ed5a 100644 --- a/examples/pipeline_example/time_series/ts_classification/advanced_example.py +++ b/examples/pipeline_example/time_series/ts_classification/advanced_example.py @@ -1,17 +1,12 @@ -import os -from pathlib import Path from fedot.core.pipelines.pipeline_builder import PipelineBuilder from examples.example_utils import evaluate_metric from examples.example_utils import init_input_data -from fedot_ind.api.utils.path_lib import PROJECT_PATH from fedot_ind.tools.loader import DataLoader from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels metric_dict = {} -# group = os.listdir(Path(PROJECT_PATH, 'data')) - model_dict = { 'eigen_basis_basic': PipelineBuilder().add_node( 'eigen_basis', @@ -54,16 +49,16 @@ # ] if __name__ == "__main__": + OperationTypesRepository = IndustrialModels().setup_repository() for dataset_name in datasets_bad_f1: train_data, test_data = DataLoader(dataset_name=dataset_name).load_data() input_data = init_input_data(train_data[0], train_data[1]) val_data = init_input_data(test_data[0], test_data[1]) - with IndustrialModels(): - for model in model_dict.keys(): - pipeline = model_dict[model].build() - pipeline.fit(input_data) - features = pipeline.predict(val_data, 'labels').predict - metric = evaluate_metric(target=test_data[1], prediction=features) - metric_dict.update({f'{dataset_name}_{model}': metric}) - print(f'{dataset_name}_{model} - {metric}') + for model in model_dict.keys(): + pipeline = model_dict[model].build() + pipeline.fit(input_data) + features = pipeline.predict(val_data, 'labels').predict + metric = evaluate_metric(target=test_data[1], prediction=features) + metric_dict.update({f'{dataset_name}_{model}': metric}) + print(f'{dataset_name}_{model} - {metric}') print(metric_dict) diff --git a/examples/pipeline_example/time_series/ts_classification/basic_example.py b/examples/pipeline_example/time_series/ts_classification/basic_example.py index b8a99cbcb..38789f2da 100644 --- a/examples/pipeline_example/time_series/ts_classification/basic_example.py +++ b/examples/pipeline_example/time_series/ts_classification/basic_example.py @@ -1,9 +1,14 @@ +import matplotlib +from fedot import Fedot from fedot.core.pipelines.pipeline_builder import PipelineBuilder +from fedot.core.pipelines.verification import common_rules + from examples.example_utils import evaluate_metric from examples.example_utils import init_input_data from fedot_ind.tools.loader import DataLoader from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels +matplotlib.use('TkAgg') model_dict = {'basic_quantile': PipelineBuilder().add_node('quantile_extractor', params={'window_size': 10, 'stride': 5}).add_node('rf'), @@ -27,16 +32,75 @@ 'rf') } metric_dict = {} -train_data, test_data = DataLoader(dataset_name='Ham').load_data() - +dataset = 'Lightning7' +dataset_multi_dim = 'LSST' +train_data, test_data = DataLoader(dataset_name = dataset_multi_dim).load_data() +ts_clf_operations = [ + 'eigen_basis', + 'dimension_reduction', + 'inception_model', + 'rf', + 'minirocket_extractor', + 'normalization', + 'omniscale_model', + 'pca', + 'mlp', + 'quantile_extractor', + 'recurrence_extractor', + 'resample', + 'scaling', + 'signal_extractor', + 'topological_features' +] +# ts_clf_operations = [ +# 'eigen_basis', +# 'dimension_reduction', +# 'inception_model', +# 'rf', +# 'minirocket_extractor', +# 'normalization', +# 'omniscale_model', +# 'pca', +# 'mlp', +# 'quantile_extractor', +# 'recurrence_extractor', +# 'resample', +# 'scaling', +# 'signal_extractor', +# 'topological_features' +# ] if __name__ == "__main__": - with IndustrialModels(): - for model in model_dict.keys(): - pipeline = model_dict[model].build() - input_data = init_input_data(train_data[0], train_data[1]) - val_data = init_input_data(test_data[0], test_data[1]) - pipeline.fit(input_data) - features = pipeline.predict(val_data).predict - metric = evaluate_metric(target=test_data[1], prediction=features) - metric_dict.update({model: metric}) + OperationTypesRepository = IndustrialModels().setup_repository() + #error_pipeline = PipelineBuilder().add_node('scaling').add_node('signal_extractor').add_node('quantile_extractor').add_node('inception_model').build() + error_pipeline = PipelineBuilder().add_node('scaling').add_node('inception_model').build() + # error_pipeline = PipelineBuilder().add_node('scaling').add_node('signal_extractor').add_node( + # 'quantile_extractor').add_node('normalization',branch_idx=1).add_node('signal_extractor',branch_idx=1).add_node( + # 'quantile_extractor',branch_idx=1).join_branches('logit').build() + # # error_pipeline = PipelineBuilder().add_node('signal_extractor').add_node( + # # 'quantile_extractor').add_node('logit').build() + # # add_node('inception_model').add_node( + # # 'scaling', + # # branch_idx=1).join_branches( + # # 'logit').build() + for model in model_dict.keys(): + pipeline = model_dict[model].build() + input_data = init_input_data(train_data[0], train_data[1]) + val_data = init_input_data(test_data[0], test_data[1]) + model = Fedot(problem='classification', + logging_level=20, + n_jobs=1, + metric='f1', + available_operations=ts_clf_operations, + timeout=20 + ) + model.fit(input_data) + model.current_pipeline.show() + features = model.predict(val_data) + metric = evaluate_metric(target=val_data.target, prediction=features) + metric_dict.update({model: metric}) + model.history.save(f"{dataset}classification_history.json") + model.history.show.fitness_box(best_fraction=0.5, dpi=100) + model.history.show.operations_kde(dpi=100) + model.history.show.operations_animated_bar(save_path=f'./{dataset}_history_animated_bars.gif', + show_fitness=True, dpi=100) print(metric_dict) diff --git a/fedot_ind/core/architecture/preprocessing/data_convertor.py b/fedot_ind/core/architecture/preprocessing/data_convertor.py index 0f043d498..33ca2ee25 100644 --- a/fedot_ind/core/architecture/preprocessing/data_convertor.py +++ b/fedot_ind/core/architecture/preprocessing/data_convertor.py @@ -100,6 +100,7 @@ # else: # if return_names: return split_xy(X, y, splits), data_cols # return split_xy(X, y, splits) +from fedot_ind.core.architecture.settings.computational import default_device from fedot_ind.core.architecture.settings.constanst_repository import MULTI_ARRAY, MATRIX @@ -117,21 +118,26 @@ def __len__(self): class CustomDatasetCLF: def __init__(self, ts): - self.x = torch.from_numpy(ts.features).float() + self.x = torch.from_numpy(ts.features).to(default_device()).float() label_1 = max(ts.class_labels) label_0 = min(ts.class_labels) - classes = ts.num_classes - if classes == 2 and label_1 != 1: + self.classes = ts.num_classes + if self.classes == 2 and label_1 != 1: ts.target[ts.target == label_0] = 0 ts.target[ts.target == label_1] = 1 - elif classes == 2 and label_0 != 0: + elif self.classes == 2 and label_0 != 0: ts.target[ts.target == label_0] = 0 ts.target[ts.target == label_1] = 1 - elif classes > 2 and label_0 == 1: + elif self.classes > 2 and label_0 == 1: ts.target = ts.target - 1 - self.y = torch.nn.functional.one_hot(torch.from_numpy(ts.target).long(), - num_classes=classes).squeeze(1) + try: + self.y = torch.nn.functional.one_hot(torch.from_numpy(ts.target).long(), + num_classes=self.classes).to(default_device()).squeeze(1) + except Exception: + self.y = torch.nn.functional.one_hot(torch.from_numpy(ts.target).long()).to(default_device()).squeeze(1) + self.classes = self.y.shape[1] + self.n_samples = ts.features.shape[0] self.supplementary_data = ts.supplementary_data diff --git a/fedot_ind/core/architecture/settings/constanst_repository.py b/fedot_ind/core/architecture/settings/constanst_repository.py index 8591a820a..c0e9b7f89 100644 --- a/fedot_ind/core/architecture/settings/constanst_repository.py +++ b/fedot_ind/core/architecture/settings/constanst_repository.py @@ -6,7 +6,7 @@ import torch from fedot.core.repository.dataset_types import DataTypesEnum from torch import nn, Tensor - +import torch.nn.functional as F from fedot_ind.core.models.nn.network_modules.losses import * from fedot_ind.core.models.quantile.stat_features import * from fedot_ind.core.models.topological.topofeatures import * diff --git a/fedot_ind/core/models/nn/network_impl/base_nn_model.py b/fedot_ind/core/models/nn/network_impl/base_nn_model.py index 7cf1fa3f2..c9600c38a 100644 --- a/fedot_ind/core/models/nn/network_impl/base_nn_model.py +++ b/fedot_ind/core/models/nn/network_impl/base_nn_model.py @@ -38,9 +38,10 @@ class BaseNeuralModel: """ def __init__(self, params: Optional[OperationParameters] = {}): - self.num_classes = params.get('num_classes', 1) + self.num_classes = params.get('num_classes', None) self.epochs = params.get('epochs', 10) self.batch_size = params.get('batch_size', 20) + self.output_mode = params.get('output_mode', 'labels') @convert_inputdata_to_torch_dataset def _create_dataset(self, ts: InputData): @@ -50,9 +51,16 @@ def _init_model(self, ts): self.model = None return + def _evalute_num_of_epochs(self, ts): + min_num_epochs = min(100, round(ts.features.shape[0] * 1.5)) + if self.epochs is None: + self.epochs = min_num_epochs + else: + self.epochs = max(min_num_epochs, self.epochs) + def _convert_predict(self, pred): pred = F.softmax(pred, dim=1) - if self.num_classes == 2: + if self.output_mode == 'labels': pred = torch.argmax(pred, dim=1) y_pred = pred.cpu().detach().numpy() predict = OutputData( @@ -70,6 +78,7 @@ def _prepare_data(self, ts, split_data: bool = True): val_dataset = self._create_dataset(val_data) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=self.batch_size, shuffle=True) + self.num_classes = train_dataset.classes return train_loader, val_loader def _train_loop(self, train_loader, val_loader, loss_fn, optimizer): @@ -88,24 +97,16 @@ def _train_loop(self, train_loader, val_loader, loss_fn, optimizer): training_loss /= len(train_loader.dataset) self.model.eval() - num_correct = 0 - num_examples = 0 for batch in val_loader: inputs, targets = batch output = self.model(inputs) loss = loss_fn(output, targets.float()) valid_loss += loss.data.item() * inputs.size(0) - correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1], - torch.max(targets, dim=1)[1]) - - num_correct += torch.sum(correct).item() - num_examples += correct.shape[0] valid_loss /= len(val_loader.dataset) - print('Epoch: {}, Training Loss: {:.2f}, Validation Loss: {:.2f}, accuracy = {:.2f}'.format(epoch, - training_loss, - valid_loss, - num_correct / num_examples)) + print('Epoch: {}, Training Loss: {:.2f}, Validation Loss: {:.2f}'.format(epoch, + training_loss, + valid_loss)) @convert_to_3d_torch_array def _fit_model(self, ts: InputData, split_data: bool = True): diff --git a/fedot_ind/core/models/nn/network_impl/inception.py b/fedot_ind/core/models/nn/network_impl/inception.py index d6e5a8634..7e8519f6e 100644 --- a/fedot_ind/core/models/nn/network_impl/inception.py +++ b/fedot_ind/core/models/nn/network_impl/inception.py @@ -61,16 +61,18 @@ class InceptionTimeModel(BaseNeuralModel): """ def __init__(self, params: Optional[OperationParameters] = {}): + super().__init__(params) self.num_classes = params.get('num_classes', 1) - self.epochs = params.get('epochs', 100) + self.epochs = params.get('epochs', None) self.batch_size = params.get('batch_size', 32) def _init_model(self, ts): self.model = InceptionTime(input_dim=ts.features.shape[1], output_dim=self.num_classes).to(default_device()) + self._evalute_num_of_epochs(ts) optimizer = optim.Adam(self.model.parameters(), lr=0.001) if ts.num_classes == 2: - loss_fn = CROSS_ENTROPY + loss_fn = CROSS_ENTROPY() else: - loss_fn = MULTI_CLASS_CROSS_ENTROPY + loss_fn = MULTI_CLASS_CROSS_ENTROPY() return loss_fn, optimizer diff --git a/fedot_ind/core/models/nn/network_impl/omni_scale.py b/fedot_ind/core/models/nn/network_impl/omni_scale.py index 8747de2b7..73d8eabf5 100644 --- a/fedot_ind/core/models/nn/network_impl/omni_scale.py +++ b/fedot_ind/core/models/nn/network_impl/omni_scale.py @@ -121,17 +121,19 @@ class OmniScaleModel(BaseNeuralModel): """ def __init__(self, params: Optional[OperationParameters] = {}): + super().__init__(params) self.num_classes = params.get('num_classes', 1) - self.epochs = params.get('epochs', 100) + self.epochs = params.get('epochs', 50) self.batch_size = params.get('batch_size', 32) def _init_model(self, ts): self.model = OmniScaleCNN(input_dim=ts.features.shape[1], output_dim=self.num_classes, seq_len=ts.features.shape[2]).to(default_device()) + self._evalute_num_of_epochs(ts) optimizer = optim.Adam(self.model.parameters(), lr=0.001) if ts.num_classes == 2: - loss_fn = CROSS_ENTROPY + loss_fn = CROSS_ENTROPY() else: - loss_fn = MULTI_CLASS_CROSS_ENTROPY + loss_fn = MULTI_CLASS_CROSS_ENTROPY() return loss_fn, optimizer diff --git a/fedot_ind/core/models/nn/network_impl/resnet.py b/fedot_ind/core/models/nn/network_impl/resnet.py index 0bab30767..b42c86a58 100644 --- a/fedot_ind/core/models/nn/network_impl/resnet.py +++ b/fedot_ind/core/models/nn/network_impl/resnet.py @@ -51,7 +51,7 @@ class ResNetModel(BaseNeuralModel): def __init__(self, params: Optional[OperationParameters] = {}): self.num_classes = params.get('num_classes', 1) - self.epochs = params.get('epochs', 100) + self.epochs = params.get('epochs', 10) self.batch_size = params.get('batch_size', 32) self.model_name = params.get('model_name', 'ResNet18') diff --git a/fedot_ind/core/operation/filtration/feature_filtration.py b/fedot_ind/core/operation/filtration/feature_filtration.py index 0077b8517..201c62b49 100644 --- a/fedot_ind/core/operation/filtration/feature_filtration.py +++ b/fedot_ind/core/operation/filtration/feature_filtration.py @@ -31,11 +31,11 @@ def _transform(self, operation): operation_name = operation.task.task_params else: operation_name = operation.task.task_params.feature_filter - if operation_name in self.method_dict.keys(): + if operation_name is None: + return operation.features + elif operation_name in self.method_dict.keys(): method = self.method_dict[operation_name] return method(operation) - else: - return operation.features def filter_dimension_num(self, data): if len(data.features.shape) < 3: diff --git a/fedot_ind/core/models/automl/fedot_evaluation_strategy.py b/fedot_ind/core/operation/interfaces/fedot_automl_evaluation_strategy.py similarity index 100% rename from fedot_ind/core/models/automl/fedot_evaluation_strategy.py rename to fedot_ind/core/operation/interfaces/fedot_automl_evaluation_strategy.py diff --git a/fedot_ind/core/models/nn/fedot_evaluation_strategy.py b/fedot_ind/core/operation/interfaces/fedot_nn_evaluation_strategy.py similarity index 96% rename from fedot_ind/core/models/nn/fedot_evaluation_strategy.py rename to fedot_ind/core/operation/interfaces/fedot_nn_evaluation_strategy.py index b85c61b5d..f4649c57f 100644 --- a/fedot_ind/core/models/nn/fedot_evaluation_strategy.py +++ b/fedot_ind/core/operation/interfaces/fedot_nn_evaluation_strategy.py @@ -30,6 +30,7 @@ def _convert_to_operation(self, operation_type: str): def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): self.operation_impl = self._convert_to_operation(operation_type) + self.output_mode = params.get('output_mode', 'labels') super().__init__(operation_type, params) def fit(self, train_data: InputData): @@ -60,4 +61,4 @@ def _convert_to_operation(self, operation_type: str): def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): self.operation_impl = self._convert_to_operation(operation_type) - super().__init__(operation_type, params) \ No newline at end of file + super().__init__(operation_type, params) diff --git a/fedot_ind/core/operation/interfaces/industrial_base_strategy.py b/fedot_ind/core/operation/interfaces/industrial_base_strategy.py new file mode 100644 index 000000000..0e819ca2b --- /dev/null +++ b/fedot_ind/core/operation/interfaces/industrial_base_strategy.py @@ -0,0 +1,83 @@ +from typing import Optional, Union + +from fedot.core.data.data import InputData +from fedot.core.operations.evaluation.common_preprocessing import FedotPreprocessingStrategy +from fedot.core.operations.operation import Operation +from fedot.core.operations.operation_parameters import OperationParameters + +from fedot_ind.core.models.nn.network_impl.mini_rocket import MiniRocketExtractor +from fedot_ind.core.models.recurrence.reccurence_extractor import RecurrenceExtractor +from fedot_ind.core.models.signal.signal_extractor import SignalExtractor +from fedot_ind.core.models.quantile.quantile_extractor import QuantileExtractor +from fedot_ind.core.models.topological.topological_extractor import TopologicalExtractor +from fedot_ind.core.operation.dummy.dummy_operation import DummyOperation +from fedot_ind.core.operation.filtration.feature_filtration import FeatureFilter + +from fedot_ind.core.operation.transformation.basis.fourier import FourierBasisImplementation +from fedot_ind.core.operation.transformation.basis.wavelet import WaveletBasisImplementation +from fedot_ind.core.operation.transformation.basis.eigen_basis import EigenBasisImplementation + +from fedot_ind.core.repository.IndustrialOperationParameters import IndustrialOperationParameters + + +class IndustrialBaseStrategy(Operation): + """ + Args: + operation_type: ``str`` of the operation defined in operation or data operation repositories + + .. details:: possible operations: + + - ``data_driven_basic``-> EigenBasisImplementation, + - ``topological_features``-> TopologicalExtractor, + + + params: hyperparameters to fit the operation with + + """ + + __operations_by_types = { + 'eigen_basis': EigenBasisImplementation, + 'wavelet_basis': WaveletBasisImplementation, + 'fourier_basis': FourierBasisImplementation, + 'topological_extractor': TopologicalExtractor, + 'quantile_extractor': QuantileExtractor, + 'signal_extractor': SignalExtractor, + 'recurrence_extractor': RecurrenceExtractor, + 'minirocket_extractor': MiniRocketExtractor, + 'cat_features': DummyOperation, + 'dimension_reduction': FeatureFilter + } + + def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): + self.operation_impl = self._convert_to_operation(operation_type) + params = IndustrialOperationParameters().from_params(operation_type, params) if params \ + else IndustrialOperationParameters().from_operation_type(operation_type) + super().__init__(operation_type, params) + + def predict(self, fitted_operation, data: InputData, params: Optional[Union[OperationParameters, dict]] = None, + output_mode: str = 'labels'): + """This method is used for defining and running of the evaluation strategy + to predict with the data provided + + Args: + fitted_operation: trained operation object + data: data used for prediction + params: hyperparameters for operation + output_mode: string with information about output of operation, + for example, is the operation predict probabilities or class labels + """ + return self._predict(fitted_operation, data, params, output_mode, is_fit_stage=False) + + def predict_for_fit(self, fitted_operation, data: InputData, params: Optional[OperationParameters] = None, + output_mode: str = 'labels'): + """This method is used for defining and running of the evaluation strategy + to predict with the data provided during fit stage + + Args: + fitted_operation: trained operation object + data: data used for prediction + params: hyperparameters for operation + output_mode: string with information about output of operation, + for example, is the operation predict probabilities or class labels + """ + return self._predict(fitted_operation, data, params, output_mode, is_fit_stage=True) \ No newline at end of file diff --git a/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py b/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py index 894caf66c..d70518d4d 100644 --- a/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py +++ b/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py @@ -1,7 +1,16 @@ +import warnings from typing import Optional +from fedot.core.data.data import InputData, OutputData from fedot.core.operations.evaluation.common_preprocessing import FedotPreprocessingStrategy +from fedot.core.operations.evaluation.operation_implementations.data_operations.categorical_encoders import \ + OneHotEncodingImplementation, LabelEncodingImplementation +from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_transformations import \ + * +from fedot.core.operations.evaluation.operation_implementations.data_operations.topological.topological_extractor import \ + TopologicalFeaturesImplementation from fedot.core.operations.operation_parameters import OperationParameters +from fedot.utilities.random import ImplementationRandomStateHandler from fedot_ind.core.models.nn.network_impl.mini_rocket import MiniRocketExtractor from fedot_ind.core.models.recurrence.reccurence_extractor import RecurrenceExtractor @@ -43,7 +52,7 @@ class IndustrialPreprocessingStrategy(FedotPreprocessingStrategy): 'recurrence_extractor': RecurrenceExtractor, 'minirocket_extractor': MiniRocketExtractor, 'cat_features': DummyOperation, - 'feature_filter_model': FeatureFilter + 'dimension_reduction': FeatureFilter } def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): @@ -57,3 +66,116 @@ def _convert_to_operation(self, operation_type: str): return self.__operations_by_types[operation_type] else: raise ValueError(f'Impossible to obtain custom preprocessing strategy for {operation_type}') + + +class IndustrialCustomPreprocessingStrategy(FedotPreprocessingStrategy): + """ + Args: + operation_type: ``str`` of the operation defined in operation or data operation repositories + + .. details:: possible operations: + + - ``scaling``-> ScalingImplementation, + - ``normalization``-> NormalizationImplementation, + - ``simple_imputation``-> ImputationImplementation, + - ``pca``-> PCAImplementation, + - ``kernel_pca``-> KernelPCAImplementation, + - ``poly_features``-> PolyFeaturesImplementation, + - ``one_hot_encoding``-> OneHotEncodingImplementation, + - ``label_encoding``-> LabelEncodingImplementation, + - ``fast_ica``-> FastICAImplementation + + params: hyperparameters to fit the operation with + + """ + + _operations_by_types = { + 'scaling': ScalingImplementation, + 'normalization': NormalizationImplementation, + 'simple_imputation': ImputationImplementation, + 'pca': PCAImplementation, + 'kernel_pca': KernelPCAImplementation, + 'poly_features': PolyFeaturesImplementation, + 'one_hot_encoding': OneHotEncodingImplementation, + 'label_encoding': LabelEncodingImplementation, + 'fast_ica': FastICAImplementation, + 'topological_features': TopologicalFeaturesImplementation + } + + def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): + self.operation_impl = self._convert_to_operation(operation_type) + super().__init__(operation_type, params) + + def fit(self, train_data: InputData): + """This method is used for operation training with the data provided + + Args: + train_data: data used for operation training + + Returns: + trained Sklearn operation + """ + + warnings.filterwarnings("ignore", category=RuntimeWarning) + operation_implementation = self.operation_impl(self.params_for_fit) + with ImplementationRandomStateHandler(implementation=operation_implementation): + if len(train_data.features.shape) > 2: + input_data = [InputData(idx=train_data.idx, + features=features, + target=train_data.target, + task=train_data.task, + data_type=train_data.data_type, + supplementary_data=train_data.supplementary_data) for features in + train_data.features.swapaxes(1, 0)] + fitted_operation = list(map(operation_implementation.fit, input_data)) + operation_implementation = fitted_operation + else: + operation_implementation.fit(train_data) + return operation_implementation + + def predict(self, trained_operation, predict_data: InputData) -> OutputData: + """Transform method for preprocessing task + + Args: + trained_operation: model object + predict_data: data used for prediction + + Returns: + prediction + """ + if type(trained_operation) is list: + prediction = self.__predict_for_ndim(predict_data, trained_operation) + else: + prediction = trained_operation.transform(predict_data) + # Convert prediction to output (if it is required) + converted = self._convert_to_output(prediction, predict_data) + return converted + + def __predict_for_ndim(self, predict_data, trained_operation): + test_data = [InputData(idx=predict_data.idx, + features=features, + target=predict_data.target, + task=predict_data.task, + data_type=predict_data.data_type, + supplementary_data=predict_data.supplementary_data) for features in + predict_data.features.swapaxes(1, 0)] + prediction = list(operation.transform(data.features) for operation, data in zip(trained_operation, test_data)) + prediction = np.stack(prediction).swapaxes(0, 1) + return prediction + + def predict_for_fit(self, trained_operation, predict_data: InputData) -> OutputData: + """ + Transform method for preprocessing task for fit stage + + Args: + trained_operation: model object + predict_data: data used for prediction + Returns: + OutputData: + """ + if type(trained_operation) is list: + prediction = self.__predict_for_ndim(predict_data, trained_operation) + else: + prediction = trained_operation.transform(predict_data) + converted = self._convert_to_output(prediction, predict_data) + return converted diff --git a/fedot_ind/core/repository/data/default_operation_params.json b/fedot_ind/core/repository/data/default_operation_params.json index 923708701..dea7e4130 100644 --- a/fedot_ind/core/repository/data/default_operation_params.json +++ b/fedot_ind/core/repository/data/default_operation_params.json @@ -219,7 +219,7 @@ "epochs": 100, "batch_size": 32 }, - "feature_filter_model": { + "dimension_reduction": { "grouping_level": 0.4, "fourier_approximation": "exact", "explained_dispersion": 0.9 diff --git a/fedot_ind/core/repository/data/industrial_data_operation_repository.json b/fedot_ind/core/repository/data/industrial_data_operation_repository.json index e6595a547..0ee81498c 100644 --- a/fedot_ind/core/repository/data/industrial_data_operation_repository.json +++ b/fedot_ind/core/repository/data/industrial_data_operation_repository.json @@ -24,13 +24,13 @@ ], "forbidden_node_types": "[]", "strategies": [ - "fedot.core.operations.evaluation.common_preprocessing", - "FedotPreprocessingStrategy" + "fedot_ind.core.operation.interfaces.industrial_preprocessing_strategy", + "IndustrialCustomPreprocessingStrategy" ], "tags": [ "sklearn" ], - "description": "Implementations of the preprocessing from scikit-learn framework" + "description": "Implementations of the preprocessing from scikit-learn framework upg" }, "sklearn_categorical": { "tasks": "[TaskTypesEnum.classification, TaskTypesEnum.regression, TaskTypesEnum.clustering]", @@ -214,7 +214,7 @@ "fast_train" ], "tags": [ - "extractor" + "basis" ] }, "recurrence_extractor": { @@ -243,7 +243,7 @@ "extractor" ] }, - "feature_filter_model": { + "dimension_reduction": { "meta": "industrial_preprocessing_classification", "presets": [ "fast_train" diff --git a/fedot_ind/core/repository/data/industrial_model_repository.json b/fedot_ind/core/repository/data/industrial_model_repository.json index 052f018cf..6fefc3992 100644 --- a/fedot_ind/core/repository/data/industrial_model_repository.json +++ b/fedot_ind/core/repository/data/industrial_model_repository.json @@ -142,7 +142,7 @@ ], "forbidden_node_types": "[]", "strategies": [ - "fedot_ind.core.models.automl.fedot_evaluation_strategy", + "fedot_ind.core.operation.interfaces.fedot_automl_evaluation_strategy", "FedotAutoMLClassificationStrategy" ], "tags": [ @@ -158,7 +158,7 @@ ], "forbidden_node_types": "[]", "strategies": [ - "fedot_ind.core.models.nn.fedot_evaluation_strategy", + "fedot_ind.core.operation.interfaces.fedot_nn_evaluation_strategy", "FedotNNClassificationStrategy" ], "tags": [ @@ -174,7 +174,7 @@ ], "forbidden_node_types": "[]", "strategies": [ - "fedot_ind.core.models.nn.fedot_evaluation_strategy", + "fedot_ind.core.operation.interfaces.fedot_nn_evaluation_strategy", "FedotNNTimeSeriesStrategy" ], "tags": ["time_series" @@ -190,7 +190,7 @@ ], "forbidden_node_types": "[]", "strategies": [ - "fedot_ind.core.models.automl.fedot_evaluation_strategy", + "fedot_ind.core.operation.interfaces.fedot_automl_evaluation_strategy", "FedotAutoMLRegressionStrategy" ], "tags": [ @@ -210,7 +210,7 @@ "fedot_cls": { "meta": "fedot_automl_classification", "presets": [ - "best_quality" + "auto" ], "tags": [ "automl" @@ -219,7 +219,7 @@ "fedot_regr": { "meta": "fedot_automl_regression", "presets": [ - "best_quality" + "auto" ], "tags": [ "automl" @@ -231,7 +231,6 @@ "best_quality" ], "tags": [ - "automl" ] }, "omniscale_model": { @@ -240,7 +239,6 @@ "best_quality" ], "tags": [ - "automl" ] }, "tst_model": { @@ -249,7 +247,6 @@ "best_quality" ], "tags": [ - "automl" ] }, "resnet_model": { diff --git a/fedot_ind/core/repository/initializer_industrial_models.py b/fedot_ind/core/repository/initializer_industrial_models.py index 8fdaa7a5e..2e0897bcf 100644 --- a/fedot_ind/core/repository/initializer_industrial_models.py +++ b/fedot_ind/core/repository/initializer_industrial_models.py @@ -1,19 +1,28 @@ import pathlib import random +from enum import Enum from typing import Sequence +from fedot.api.api_utils.api_composer import ApiComposer from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation, boosting_mutation from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace +from fedot.core.pipelines.verification import class_rules, common_rules from fedot.core.repository.operation_types_repository import OperationTypesRepository, get_operations_for_task from fedot.core.repository.tasks import Task, TaskTypesEnum from golem.core.dag.verification_rules import ERROR_PREFIX +from golem.core.optimisers.genetic.operators.base_mutations import MutationStrengthEnum from golem.core.optimisers.genetic.operators.mutation import MutationTypesEnum from fedot_ind.api.utils.path_lib import PROJECT_PATH from fedot_ind.core.tuning.search_space import get_industrial_search_space +class MutationStrengthEnumIndustrial(Enum): + weak = 1.0 + mean = 3.0 + strong = 5.0 + def add_preprocessing(pipeline: Pipeline, **kwargs) -> Pipeline: task = Task(TaskTypesEnum.classification) @@ -36,6 +45,7 @@ def add_preprocessing(pipeline: Pipeline, **kwargs) -> Pipeline: def _get_default_industrial_mutations(task_type: TaskTypesEnum) -> Sequence[MutationTypesEnum]: mutations = [parameter_change_mutation, MutationTypesEnum.single_change, + add_preprocessing ] return mutations @@ -64,7 +74,7 @@ def has_no_data_flow_conflicts_in_industrial_pipeline(pipeline: Pipeline): extractor = get_operations_for_task(task=task, mode='data_operation', tags=["extractor"]) other = get_operations_for_task(task=task, forbidden_tags=["basis", "extractor"]) - for node in pipeline.nodes: + for idx, node in enumerate(pipeline.nodes): # Operation name in the current node current_operation = node.operation.operation_type parent_nodes = node.nodes_from @@ -75,19 +85,23 @@ def has_no_data_flow_conflicts_in_industrial_pipeline(pipeline: Pipeline): # There are several parents for current node or at least 1 for parent in parent_nodes: parent_operation = parent.operation.operation_type - if current_operation in extractor: - if parent_operation not in basis_models: - raise ValueError( - f'{ERROR_PREFIX} Pipeline has incorrect subgraph with wrong parent nodes combination') + if current_operation in basis_models and pipeline.nodes[ + idx + 1].operation.operation_type not in extractor: + raise ValueError( + f'{ERROR_PREFIX} Pipeline has incorrect subgraph with wrong parent nodes combination. ' + f'Basis output should contain feature transformation') elif current_operation in other: if parent_operation in basis_models: raise ValueError( - f'{ERROR_PREFIX} Pipeline has incorrect subgraph with wrong parent nodes combination') + f'{ERROR_PREFIX} Pipeline has incorrect subgraph with wrong parent nodes combination.' + f'Basis model should by applied on initial features, not models output.') + else: - # Only basis models can be primary - if current_operation not in basis_models: - raise ValueError( - f'{ERROR_PREFIX} Pipeline has incorrect subgraph with wrong parent nodes combination') + continue + # # Only basis models can be primary + # if current_operation not in basis_models: + # raise ValueError( + # f'{ERROR_PREFIX} Pipeline has incorrect subgraph with wrong parent nodes combination') return True @@ -122,6 +136,10 @@ def setup_repository(self): OperationTypesRepository.assign_repo('model', self.industrial_model_path) setattr(PipelineSearchSpace, "get_parameters_dict", get_industrial_search_space) + setattr(ApiComposer, "_get_default_mutations", _get_default_industrial_mutations) + class_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline) + MutationStrengthEnum = MutationStrengthEnumIndustrial + # common_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline) return OperationTypesRepository def __enter__(self): @@ -143,7 +161,6 @@ def __enter__(self): setattr(PipelineSearchSpace, "get_parameters_dict", get_industrial_search_space) - def __exit__(self, exc_type, exc_val, exc_tb): """ Switching to fedot models. @@ -160,5 +177,3 @@ def __exit__(self, exc_type, exc_val, exc_tb): 'initialized_repo': None, 'default_tags': []}}) OperationTypesRepository.assign_repo('model', self.base_model_path) - - # setattr(ApiComposer, "_get_default_mutations", _get_default_mutations) diff --git a/fedot_ind/core/tuning/search_space.py b/fedot_ind/core/tuning/search_space.py index e7042f239..b5cca2c54 100644 --- a/fedot_ind/core/tuning/search_space.py +++ b/fedot_ind/core/tuning/search_space.py @@ -29,10 +29,16 @@ {'wavelet': {'hyperopt-dist': hp.choice, 'sampling-scope': [['mexh', 'shan', 'morl', 'cmor', 'fbsp', 'db5', 'sym5']]}}, 'minirocket_extractor': - {'num_features': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(500, 10000, 500)]]}}, + {'num_features': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5000, 20000, 1000)]]}}, 'patch_tst_model': {'epochs': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(10, 100, 10)]]}, 'batch_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(8, 64, 6)]]}}, + 'omniscale_model': + {'epochs': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(30, 300, 30)]]}, + 'batch_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(8, 64, 6)]]}}, + 'inception_model': + {'epochs': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(30, 300, 30)]]}, + 'batch_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(8, 64, 6)]]}}, 'ssa_forecaster': {'window_size_method': {'hyperopt-dist': hp.choice, 'sampling-scope': [['hac', 'dff']]}} }