Skip to content

Commit

Permalink
small fix of get_metrics() method
Browse files Browse the repository at this point in the history
  • Loading branch information
technocreep committed Jan 24, 2024
1 parent 3be47b8 commit 974e41f
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 91 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
model = industrial.fit(train_data)

labels = industrial.predict(test_data)
# probas = industrial.predict_proba(test_data)

probs = industrial.predict_proba(test_data)
metrics = industrial.get_metrics(target=test_data[1],
rounding_order=3,
metric_names=['f1', 'accuracy', 'precision', 'roc_auc'])
# industrial.finetune(train_data)
print(classification_report(test_data[1], labels, digits=4))
100 changes: 63 additions & 37 deletions fedot_ind/api/main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import logging
import warnings
from pathlib import Path

import pandas as pd
from fedot.api.main import Fedot
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.metrics_repository import ClassificationMetricsEnum
from golem.core.tuning.simultaneous import SimultaneousTuner

from fedot_ind.api.utils.checkers_collections import DataCheck
Expand All @@ -16,15 +15,13 @@
from fedot_ind.core.ensemble.random_automl_forest import RAFensembler
from fedot_ind.core.operation.transformation.splitter import TSTransformer
from fedot_ind.core.optimizer.IndustrialEvoOptimizer import IndustrialEvoOptimizer
from fedot_ind.core.repository.constanst_repository import BATCH_SIZE_FOR_FEDOT_WORKER, FEDOT_WORKER_NUM, \
FEDOT_WORKER_TIMEOUT_PARTITION, FEDOT_GET_METRICS, FEDOT_TUNING_METRICS, FEDOT_HEAD_ENSEMBLE, \
FEDOT_ATOMIZE_OPERATION, FEDOT_ASSUMPTIONS
from fedot_ind.core.repository.constanst_repository import BATCH_SIZE_FOR_FEDOT_WORKER, FEDOT_ASSUMPTIONS, \
FEDOT_GET_METRICS, FEDOT_HEAD_ENSEMBLE, FEDOT_TUNING_METRICS, FEDOT_WORKER_NUM, FEDOT_WORKER_TIMEOUT_PARTITION
from fedot_ind.core.repository.initializer_industrial_models import IndustrialModels
from fedot_ind.core.repository.model_repository import default_industrial_availiable_operation
from fedot_ind.tools.explain.explain import PointExplainer
from fedot_ind.tools.synthetic.anomaly_generator import AnomalyGenerator
from fedot_ind.tools.synthetic.ts_generator import TimeSeriesGenerator
import warnings

warnings.filterwarnings("ignore")

Expand Down Expand Up @@ -87,6 +84,9 @@ def __init__(self, **kwargs):
super(Fedot, self).__init__()
self.logger = logging.getLogger('FedotIndustrialAPI')

self.solver = None
self.predicted_labels = None
self.predicted_probs = None
self.predict_data = None
self.config_dict = None
self.ensemble_solver = None
Expand Down Expand Up @@ -179,7 +179,8 @@ def predict(self,
"""
self.predict_data = DataCheck(input_data=predict_data, task=self.config_dict['problem']).check_input_data()
predict = self.solver.predict(self.predict_data)
return predict if isinstance(self.solver, Fedot) else predict.predict
self.predicted_labels = predict if isinstance(self.solver, Fedot) else predict.predict
return self.predicted_labels

def predict_proba(self,
predict_data,
Expand All @@ -196,15 +197,15 @@ def predict_proba(self,
"""
self.predict_data = DataCheck(input_data=predict_data, task=self.config_dict['problem']).check_input_data()
proba = self.solver.predict_proba(self.predict_data)
return proba if isinstance(self.solver, Fedot) else proba.predict_proba

probs = self.solver.predict_proba(self.predict_data)
self.predicted_probs = probs if isinstance(self.solver, Fedot) else probs.predict_proba
return self.predicted_probs

def finetune(self,
train_data,
tuning_params=None,
mode: str = 'full'):
"""
train_data,
tuning_params=None,
mode: str = 'full'):
"""
Method to obtain prediction probabilities from trained Industrial model.
Args:
Expand All @@ -214,28 +215,53 @@ def finetune(self,
"""

train_data = DataCheck(input_data=train_data, task=self.config_dict['problem']).check_input_data()
if tuning_params is None:
tuning_params = {}
metric = FEDOT_TUNING_METRICS[self.config_dict['problem']]
pipeline_tuner = TunerBuilder(train_data.task) \
.with_tuner(SimultaneousTuner) \
.with_metric(metric) \
.with_timeout(tuning_params.get('tuning_timeout', 2)) \
.with_early_stopping_rounds(tuning_params.get('tuning_early_stop', 5)) \
.with_iterations(tuning_params.get('tuning_iterations', 10)) \
.build(train_data)
if mode == 'full':
batch_pipelines = [automl_branch for automl_branch in self.solver.current_pipeline.nodes if
automl_branch.name in FEDOT_HEAD_ENSEMBLE]
for b_pipeline in batch_pipelines:
b_pipeline.fitted_operation.current_pipeline = pipeline_tuner.tune(b_pipeline.fitted_operation.current_pipeline)
b_pipeline.fitted_operation.current_pipeline.fit(train_data)
pipeline_tuner.tune(self.solver.current_pipeline)
self.solver.current_pipeline.fit(train_data)

def get_metrics(self, target, labels, probs) -> dict:
return FEDOT_GET_METRICS[self.config_dict['problem']](target, labels, probs)
train_data = DataCheck(input_data=train_data, task=self.config_dict['problem']).check_input_data()
if tuning_params is None:
tuning_params = {}
metric = FEDOT_TUNING_METRICS[self.config_dict['problem']]
pipeline_tuner = TunerBuilder(train_data.task) \
.with_tuner(SimultaneousTuner) \
.with_metric(metric) \
.with_timeout(tuning_params.get('tuning_timeout', 2)) \
.with_early_stopping_rounds(tuning_params.get('tuning_early_stop', 5)) \
.with_iterations(tuning_params.get('tuning_iterations', 10)) \
.build(train_data)
if mode == 'full':
batch_pipelines = [automl_branch for automl_branch in self.solver.current_pipeline.nodes if
automl_branch.name in FEDOT_HEAD_ENSEMBLE]
for b_pipeline in batch_pipelines:
b_pipeline.fitted_operation.current_pipeline = pipeline_tuner.tune(
b_pipeline.fitted_operation.current_pipeline)
b_pipeline.fitted_operation.current_pipeline.fit(train_data)
pipeline_tuner.tune(self.solver.current_pipeline)
self.solver.current_pipeline.fit(train_data)

def get_metrics(self, target=None,
metric_names=None,
rounding_order=3,
**kwargs) -> pd.DataFrame:
"""
Method to calculate metrics for Industrial model.
Available metrics for classification task: 'f1', 'accuracy', 'precision', 'roc_auc', 'log_loss'.
Available metrics for regression task: 'r2', 'rmse', 'mse', 'mae', 'median_absolute_error',
'explained_variance_score', 'max_error', 'd2_absolute_error_score', 'msle', 'mape'.
Args:
target (np.ndarray): target values
metric_names (list): list of metric names
rounding_order (int): rounding order for metrics
Returns:
pandas DataFrame with calculated metrics
"""
return FEDOT_GET_METRICS[self.config_dict['problem']](target=target,
metric_names=metric_names,
rounding_order=rounding_order,
labels=self.predicted_labels,
probs=self.predicted_probs)

def save_predict(self, predicted_data, **kwargs) -> None:
"""
Expand Down
108 changes: 56 additions & 52 deletions fedot_ind/core/metrics/metrics_implementation.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
from typing import Union
from sklearn.metrics import explained_variance_score, max_error, mean_absolute_error, \
mean_squared_error, d2_absolute_error_score, \
median_absolute_error, r2_score
from fedot_ind.core.architecture.settings.computational import backend_methods as np

import pandas as pd
from sklearn.metrics import (accuracy_score, f1_score,
log_loss, mean_absolute_error,
mean_absolute_percentage_error,
mean_squared_error, mean_squared_log_error,
precision_score, r2_score, roc_auc_score)
from sklearn.metrics import d2_absolute_error_score, explained_variance_score, max_error, median_absolute_error

from fedot_ind.core.architecture.settings.computational import backend_methods as np

class ParetoMetrics:
def __init__(self):
pass

class ParetoMetrics:
def pareto_metric_list(self, costs: Union[list, np.ndarray], maximise: bool = True) -> np.ndarray:
""" Calculates the pareto front for a list of costs.
Expand Down Expand Up @@ -43,7 +40,7 @@ def __init__(self, target,
predicted_labels,
predicted_probs=None,
metric_list: list = (
'f1', 'roc_auc', 'accuracy', 'logloss', 'precision'),
'f1', 'roc_auc', 'accuracy', 'logloss', 'precision'),
default_value: float = 0.0):
self.predicted_probs = predicted_probs
self.predicted_labels = np.array(predicted_labels).flatten()
Expand Down Expand Up @@ -77,16 +74,13 @@ def metric(self) -> float:

class F1(QualityMetric):
def metric(self) -> float:
target = self.target
prediction = self.predicted_labels
self.default_value = 0.0
n_classes = len(np.unique(target))
n_classes_pred = len(np.unique(prediction))
n_classes = len(np.unique(self.target))
n_classes_pred = len(np.unique(self.predicted_labels))
try:
if n_classes > 2 or n_classes_pred > 2:
return f1_score(y_true=target, y_pred=prediction, average='weighted')
return f1_score(y_true=self.target, y_pred=self.predicted_labels, average='weighted')
else:
return f1_score(y_true=target, y_pred=prediction, average='binary')
return f1_score(y_true=self.target, y_pred=self.predicted_labels, average='binary')
except ValueError:
return self.default_value

Expand All @@ -105,7 +99,6 @@ class ROCAUC(QualityMetric):
def metric(self) -> float:
n_classes = len(np.unique(self.target))

self.default_value = 0.5
if n_classes > 2:
target = pd.get_dummies(self.target)
additional_params = {'multi_class': 'ovr', 'average': 'macro'}
Expand All @@ -127,55 +120,66 @@ def metric(self) -> float:

class Precision(QualityMetric):
def metric(self) -> float:
target = self.target
prediction = self.predicted_labels

n_classes = np.unique(target)
n_classes = np.unique(self.target)
if n_classes.shape[0] >= 2:
additional_params = {'average': 'macro'}
else:
additional_params = {}

score = precision_score(
y_pred=prediction, y_true=target, **additional_params)
y_pred=self.predicted_labels, y_true=self.target, **additional_params)
score = round(score, 3)
return score


class Logloss(QualityMetric):
def metric(self) -> float:
target = self.target
prediction = self.predicted_probs
return log_loss(y_true=target, y_pred=prediction)
return log_loss(y_true=self.target, y_pred=self.predicted_probs)


class Accuracy(QualityMetric):
def metric(self) -> float:
target = self.target
prediction = self.predicted_labels
return accuracy_score(y_true=target, y_pred=prediction)


def calculate_regression_metric(test_target, labels):
test_target = test_target.astype(float)
metric_dict = {'r2_score:': r2_score(test_target, labels),
'mean_squared_error:': mean_squared_error(test_target, labels),
'root_mean_squared_error:': np.sqrt(mean_squared_error(test_target, labels)),
'mean_absolute_error': mean_absolute_error(test_target, labels),
'median_absolute_error': median_absolute_error(test_target, labels),
'explained_variance_score': explained_variance_score(test_target, labels),
'max_error': max_error(test_target, labels),
'd2_absolute_error_score': d2_absolute_error_score(test_target, labels)
}
df = pd.DataFrame.from_dict(metric_dict, orient='index')
return df


def calculate_classification_metric(test_target, labels, probs):

metric_dict = {'accuracy:': Accuracy(test_target, labels, probs).metric(),
'f1': F1(test_target, labels, probs).metric(),
'roc_auc:': ROCAUC(test_target, labels, probs).metric()
}
df = pd.DataFrame.from_dict(metric_dict, orient='index')
return df
return accuracy_score(y_true=self.target, y_pred=self.predicted_labels)


def calculate_regression_metric(target,
labels,
rounding_order=3,
metric_names=('r2', 'rmse', 'mae')):
target = target.astype(float)

def rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))

metric_dict = {'r2': r2_score,
'mse': mean_squared_error,
'rmse': rmse,
'mae': mean_absolute_error,
'msle': mean_squared_log_error,
'mape': mean_absolute_percentage_error,
'median_absolute_error': median_absolute_error,
'explained_variance_score': explained_variance_score,
'max_error': max_error,
'd2_absolute_error_score': d2_absolute_error_score}

df = pd.DataFrame({name: func(target, labels) for name, func in metric_dict.items()
if name in metric_names},
index=[0])
return df.round(rounding_order)


def calculate_classification_metric(target,
labels,
probs,
rounding_order=3,
metric_names=('f1', 'roc_auc', 'accuracy')):
metric_dict = {'accuracy': Accuracy,
'f1': F1,
'roc_auc': ROCAUC,
'precision': Precision,
'logloss': Logloss}

df = pd.DataFrame({name: func(target, labels, probs).metric() for name, func in metric_dict.items()
if name in metric_names},
index=[0])
return df.round(rounding_order)

0 comments on commit 974e41f

Please sign in to comment.