fix in f1 metric

aimclub · Dec 25, 2023 · c8e465b · c8e465b
1 parent ea8d970
commit c8e465b
Show file tree

Hide file tree

Showing 6 changed files with 130 additions and 47 deletions.
diff --git a/examples/benchmark/time_series_uni_clf_benchmark.py b/examples/benchmark/time_series_uni_clf_benchmark.py
@@ -37,19 +37,19 @@
 if __name__ == "__main__":
     benchmark = BenchmarkTSC(experiment_setup=experiment_setup,
                              custom_datasets=[
-                                 # "Beef",
-                                 # "BeetleFly",
-                                 # "BirdChicken",
-                                 # "BME",
-                                 # "Car",
-                                 # "CBF",
-                                 # "Chinatown",
-                                 # "ChlorineConcentration",
-                                 # "CinCECGTorso",
-                                 # "Coffee",
-                                 # "Computers",
-                                 # "CricketX",
-                                 # "CricketY",
+                                 "Beef",
+                                 "BeetleFly",
+                                 "BirdChicken",
+                                 "BME",
+                                 "Car",
+                                 "CBF",
+                                 "Chinatown",
+                                 "ChlorineConcentration",
+                                 "CinCECGTorso",
+                                 "Coffee",
+                                 "Computers",
+                                 "CricketX",
+                                 "CricketY",
                                  "CricketZ",
                                  "Crop",
                                  "DiatomSizeReduction",
@@ -149,4 +149,6 @@
                                  "Yoga",
                              ],
                              use_small_datasets=True)
-    benchmark.finetune()
+    benchmark.create_report()
+    #benchmark.finetune()
+    #benchmark.run()
diff --git a/fedot_ind/api/main.py b/fedot_ind/api/main.py
@@ -176,10 +176,8 @@ def finetune(self, train_data, tuning_params) -> np.ndarray:
 
         """
         train_data = DataCheck(input_data=train_data, task=self.config_dict['problem']).check_input_data()
-        if train_data.num_classes > 2:
-            metric = ClassificationMetricsEnum.f1
-        else:
-            metric = ClassificationMetricsEnum.accuracy
+
+        metric = ClassificationMetricsEnum.accuracy
         tuning_method = partial(SequentialTuner, inverse_node_order=True)
         tuning_method = SimultaneousTuner
         pipeline_tuner = TunerBuilder(train_data.task) \

diff --git a/fedot_ind/core/ensemble/random_automl_forest.py b/fedot_ind/core/ensemble/random_automl_forest.py
@@ -0,0 +1,26 @@
+from fedot.core.pipelines.pipeline_builder import PipelineBuilder
+
+
+class RAFEnsembler:
+    def __init__(self, composing_params, ensemble_type: str = 'random_automl_forest'):
+        problem_dict = {'regression': 'fedot_regr',
+                        'classification': 'fedot_cls'}
+        ensemble_dict = {'random_automl_forest': self.__raf_ensemle,
+                         'two_stage_kernel': self.__two_stage_kernel
+                         }
+        self.atomized_automl = problem_dict[composing_params['problem']]
+        self.ensemble_method = ensemble_dict[ensemble_type]
+        self.atomized_automl_params = composing_params
+
+    def fit(self, train_data):
+        pass
+
+    def predict(self, test_data):
+        pass
+
+    def __raf_ensemble(self, chunks):
+        raf_ensemble = PipelineBuilder()
+        for i in range(chunks):
+            raf_ensemble.add_node(self.atomized_automl, params=self.atomized_automl_params, branch_idx=i)
+        raf_ensemble.join_branches('logit')
+        return raf_ensemble
diff --git a/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py b/fedot_ind/core/operation/interfaces/industrial_preprocessing_strategy.py
@@ -1,16 +1,14 @@
 import warnings
 from copy import deepcopy
 
-import numpy as np
 from fedot.core.data.data import InputData, OutputData
-from fedot.core.operations.evaluation.common_preprocessing import FedotPreprocessingStrategy
 from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy, \
     convert_to_multivariate_model, is_multi_output_task
 from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_transformations import \
     *
 from fedot.core.operations.operation_parameters import OperationParameters
 from fedot.core.repository.dataset_types import DataTypesEnum
-from fedot.core.repository.operation_types_repository import OperationTypesRepository
+from fedot.core.repository.operation_types_repository import OperationTypesRepository, get_operation_type_from_id
 from fedot.utilities.random import ImplementationRandomStateHandler
 
 from fedot_ind.core.architecture.preprocessing.data_convertor import NumpyConverter
@@ -92,7 +90,7 @@ def _convert_to_output(self, prediction,
                                    target=predict_data.target,
                                    data_type=output_data_type,
                                    supplementary_data=predict_data.supplementary_data)
-
+        converted.predict = NumpyConverter(data=converted.predict).convert_to_torch_format()
         return converted
 
     def _sklearn_compatible_prediction(self, trained_operation, predict_data, output_mode: str = 'probs'):
@@ -290,7 +288,43 @@ def predict(self, trained_operation, predict_data: InputData,
         return converted
 
 
-class IndustrialPreprocessingStrategy(FedotPreprocessingStrategy):
+class IndustrialCustomPreprocessingStrategy:
+    _operations_by_types = FEDOT_PREPROC_MODEL
+
+    def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):
+        self.operation_impl = self._convert_to_operation(operation_type)
+        self.multi_dim_dispatcher = MultiDimPreprocessingStrategy(self.operation_impl, operation_type)
+        self.params_for_fit = params or OperationParameters()
+        self.operation_id = operation_type
+        self.output_mode = False
+
+    @property
+    def operation_type(self):
+        return get_operation_type_from_id(self.operation_id)
+
+    @property
+    def implementation_info(self) -> str:
+        return str(self._convert_to_operation(self.operation_type))
+
+    def _convert_to_operation(self, operation_type: str):
+        if operation_type in self._operations_by_types:
+            return self._operations_by_types[operation_type]
+        else:
+            raise ValueError(f'Impossible to obtain {self.__class__} strategy for {operation_type}')
+
+    def fit(self, train_data: InputData):
+        return self.multi_dim_dispatcher.fit(train_data, mode='custom_fit')
+
+    def predict(self, trained_operation, predict_data: InputData, output_mode: str = 'default'):
+        return self.multi_dim_dispatcher.predict(trained_operation, predict_data,
+                                                 mode='feature_extraction', output_mode=output_mode)
+
+    def predict_for_fit(self, trained_operation, predict_data: InputData, output_mode: str = 'default') -> OutputData:
+        return self.multi_dim_dispatcher.predict_for_fit(trained_operation, predict_data,
+                                                         mode='feature_extraction', output_mode=output_mode)
+
+
+class IndustrialPreprocessingStrategy(IndustrialCustomPreprocessingStrategy):
     __operations_by_types = INDUSTRIAL_PREPROC_MODEL
 
     def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):
@@ -334,7 +368,6 @@ def predict(self, trained_operation, predict_data: InputData, output_mode: str =
             :param output_mode:
         """
         prediction = trained_operation.transform(predict_data)
-        # Convert prediction to output (if it is required)
         converted = self.multi_dim_dispatcher._convert_to_output(prediction, predict_data)
         return converted
 
@@ -353,26 +386,6 @@ def predict_for_fit(self, trained_operation, predict_data: InputData, output_mod
         return converted
 
 
-class IndustrialCustomPreprocessingStrategy(FedotPreprocessingStrategy):
-    _operations_by_types = FEDOT_PREPROC_MODEL
-
-    def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):
-        self.operation_impl = self._convert_to_operation(operation_type)
-        self.multi_dim_dispatcher = MultiDimPreprocessingStrategy(self.operation_impl, operation_type)
-        super().__init__(operation_type, params)
-
-    def fit(self, train_data: InputData):
-        return self.multi_dim_dispatcher.fit(train_data, mode='custom_fit')
-
-    def predict(self, trained_operation, predict_data: InputData, output_mode: str = 'default'):
-        return self.multi_dim_dispatcher.predict(trained_operation, predict_data,
-                                                 mode='feature_extraction', output_mode=output_mode)
-
-    def predict_for_fit(self, trained_operation, predict_data: InputData, output_mode: str = 'default') -> OutputData:
-        return self.multi_dim_dispatcher.predict_for_fit(trained_operation, predict_data,
-                                                         mode='feature_extraction', output_mode=output_mode)
-
-
 class IndustrialClassificationPreprocessingStrategy(IndustrialCustomPreprocessingStrategy):
     """ Strategy for applying custom algorithms from FEDOT to preprocess data
     for classification task
@@ -389,7 +402,7 @@ def fit(self, train_data: InputData):
         return self.multi_dim_dispatcher.fit(train_data, mode='feature_extraction')
 
 
-class IndustrialSkLearnEvaluationStrategy(EvaluationStrategy):
+class IndustrialSkLearnEvaluationStrategy(IndustrialCustomPreprocessingStrategy):
 
     def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):
         self.operation_impl = self._convert_to_operation(operation_type)

diff --git a/fedot_ind/core/repository/initializer_industrial_models.py b/fedot_ind/core/repository/initializer_industrial_models.py
@@ -6,6 +6,7 @@
 from typing import List, Iterable, Union, Optional
 
 import numpy as np
+from fedot.core.composer.metrics import QualityMetric, from_maximised_metric, F1
 
 from fedot.core.data.array_utilities import atleast_4d
 
@@ -35,6 +36,13 @@
 from fedot_ind.core.repository.model_repository import INDUSTRIAL_PREPROC_MODEL, AtomizedModel
 from fedot_ind.core.tuning.search_space import get_industrial_search_space
 
+import numpy as np
+from sklearn.metrics import f1_score
+
+from fedot.core.data.data import InputData, OutputData
+from fedot.core.pipelines.pipeline import Pipeline
+from fedot.core.repository.tasks import TaskTypesEnum
+
 
 class MutationStrengthEnumIndustrial(Enum):
     weak = 1.0
@@ -355,6 +363,32 @@ def merge_predicts(*args) -> np.array:
         return np.concatenate(predicts, axis=1)
 
 
+@staticmethod
+@from_maximised_metric
+def metric(reference: InputData, predicted: OutputData) -> float:
+    n_classes = reference.num_classes
+    default_value = 0
+    output_mode = 'labels'
+    binary_averaging_mode = 'binary'
+    multiclass_averaging_mode = 'weighted'
+    if n_classes > 2:
+        additional_params = {'average': multiclass_averaging_mode}
+    else:
+        u, count = np.unique(np.ravel(reference.target), return_counts=True)
+        count_sort_ind = np.argsort(count)
+        pos_label = u[count_sort_ind[0]].item()
+        additional_params = {'average': binary_averaging_mode, 'pos_label': pos_label}
+    try:
+        return f1_score(y_true=reference.target, y_pred=predicted.predict,
+                        **additional_params)
+    except Exception:
+        additional_params = {'average': multiclass_averaging_mode}
+        if predicted.predict.shape[1] > reference.target.shape[1]:
+            predicted.predict = np.argmax(predicted.predict, axis=1)
+        return f1_score(y_true=reference.target, y_pred=predicted.predict,
+                        **additional_params)
+
+
 class IndustrialModels:
     def __init__(self):
         self.industrial_data_operation_path = pathlib.Path(PROJECT_PATH, 'fedot_ind',
@@ -389,9 +423,9 @@ def setup_repository(self):
         setattr(ApiParamsRepository, "_get_default_mutations", _get_default_industrial_mutations)
         setattr(ImageDataMerger, "preprocess_predicts", preprocess_predicts)
         setattr(ImageDataMerger, "merge_predicts", merge_predicts)
+        setattr(F1, "merge_predicts", metric)
         class_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline)
         MutationStrengthEnum = MutationStrengthEnumIndustrial
-        # common_rules.append(has_no_data_flow_conflicts_in_industrial_pipeline)
         return OperationTypesRepository
 
     def __enter__(self):

diff --git a/fedot_ind/core/tuning/search_space.py b/fedot_ind/core/tuning/search_space.py
@@ -107,7 +107,17 @@ def get_industrial_search_space(self):
             'C': {
                 'hyperopt-dist': hp.uniform,
                 'sampling-scope': [1e-2, 10.0],
-                'type': 'continuous'}
+                'type': 'continuous'},
+
+            'penalty': {
+                'hyperopt-dist': hp.choice,
+                'sampling-scope': [['l1', 'l2', 'elasticnet']],
+                'type': 'categorical'},
+
+            'solver': {
+                'hyperopt-dist': hp.choice,
+                'sampling-scope': [['lbfgs', 'newton-cg', 'newton-cholesky', 'saga']],
+                'type': 'categorical'}
         },
         'rf': {
             'criterion': {