From 49d0570330408700a89905475c03a71293cb4f3a Mon Sep 17 00:00:00 2001 From: leostre Date: Mon, 15 Jul 2024 12:30:35 +0300 Subject: [PATCH] refactored train loop + microfixes --- .../core/metrics/metrics_implementation.py | 122 +++++++++++++++++- fedot_ind/core/models/early_tc/ecec.py | 6 +- fedot_ind/core/models/early_tc/economy_k.py | 3 +- .../core/models/early_tc/prob_threshold.py | 3 +- fedot_ind/core/models/early_tc/teaser.py | 5 +- .../models/nn/network_impl/base_nn_model.py | 117 ++++++++++------- .../core/models/nn/network_impl/mlstm.py | 97 +++++--------- 7 files changed, 227 insertions(+), 126 deletions(-) diff --git a/fedot_ind/core/metrics/metrics_implementation.py b/fedot_ind/core/metrics/metrics_implementation.py index fea9c2877..8803f18dd 100644 --- a/fedot_ind/core/metrics/metrics_implementation.py +++ b/fedot_ind/core/metrics/metrics_implementation.py @@ -1,6 +1,7 @@ from typing import Optional from typing import Union +import matplotlib.pyplot as plt import numpy as np import pandas as pd from fedot.core.data.data import InputData @@ -220,6 +221,9 @@ def smape(a, f, _=None): return 1 / len(a) * np.sum(2 * np.abs(f - a) / (np.abs(a) + np.abs(f)) * 100) +def rmse(y_true, y_pred): + return np.sqrt(mean_squared_error(y_true, y_pred)) + def mape(A, F): return mean_absolute_percentage_error(A, F) @@ -232,9 +236,6 @@ def calculate_regression_metric(target, **kwargs): target = target.astype(float) - def rmse(y_true, y_pred): - return np.sqrt(mean_squared_error(y_true, y_pred)) - metric_dict = {'r2': r2_score, 'mse': mean_squared_error, 'rmse': rmse, @@ -261,9 +262,6 @@ def calculate_forecasting_metric(target, **kwargs): target = target.astype(float) - def rmse(y_true, y_pred): - return np.sqrt(mean_squared_error(y_true, y_pred)) - metric_dict = { 'rmse': rmse, 'mae': mean_absolute_error, @@ -345,10 +343,95 @@ def kl_divergence(solution: pd.DataFrame, return np.average(solution.sum(axis=1), weights=sample_weights) else: return np.average(solution.mean()) + +class ETSCPareto(QualityMetric, ParetoMetrics): + def __init__(self, + target, + predicted_labels, + predicted_probs=None, + weigths: tuple = None, + mode: str = 'robust', + reduce: bool = True, + metric_list: tuple = ( + 'f1', 'roc_auc', 'accuracy', 'logloss', 'precision'), + default_value: float = 0.0): + self.target = target.flatten() + self.predicted_labels = predicted_labels + self.predicted_probs = predicted_probs + self.metric_list = metric_list + self.default_value = default_value + self.weights = weigths + self.mode = mode + self.columns = ['robustness'] if self.mode == 'robust' else [] + self.columns.extend(metric_list) + self.reduce = reduce + def metric(self) -> float: + if len(self.predicted_labels.shape) == 1: + self.predicted_labels = self.predicted_labels[None, ...] + self.predicted_probs = self.predicted_probs[None, ...] + + n_metrics = len(self.metric_list) + (self.mode == 'robust') + n_est = self.predicted_labels.shape[0] + result = np.zeros((n_est, n_metrics)) + if self.mode == 'robust': + mask = self.predicted_probs >= 0 + if not mask.any(): + return result + robustness = mask.sum(-1) / self.predicted_probs.shape[-1] + result[:, 0] = robustness.flatten() + else: + mask = np.ones_like(self.predicted_probs, dtype=bool) + + for est in range(n_est): + for i, metric in enumerate(self.metric_list, 1): + assert metric in CLASSIFICATION_METRIC_DICT, f'{metric} is not found in available metrics' + metric_value = CLASSIFICATION_METRIC_DICT[metric](self.target[mask[est]], + self.predicted_labels[est][mask[est]]) + result[est, i] = metric_value + + if self.weights is None: + if self.reduce: + self.weights = np.empty(n_metrics) + self.weights.fill(1 / len(self.weights)) + else: + self.weights = np.eye(n_metrics) + else: + assert self.weights.shape[-1] == self.metrics.shape[-1], 'Metrics and weights size mismatch!' + self.weights /= self.weights.sum() + + result = result @ self.weights.T + if not self.reduce: + return pd.DataFrame(result, columns=self.columns) + else: + return result + + def plot_bicrit_metric(self, metrics, select=None, metrics_names=None): + if not metrics_names: + metrics_names = ('Robustness', 'Accuracy') + plt.figure(figsize=(10, 10)) + assert metrics.shape[-1] == 2, 'only 2 metrics can be plotted' + for i, metric in enumerate(metrics): + selection = metric[select] + sizes = ((np.arange(selection.shape[0]) * 2)[::-1]) ** 1.5 + 10 + plt.scatter(*(metric[select]).T, + s=sizes, + label=i) + plt.legend(loc="upper right", bbox_to_anchor=(1.5, 1)) + plt.ylabel(metrics_names[1]) + plt.xlabel(metrics_names[0]) + plt.xlim((-0.05, 1.05)) + plt.ylim((-0.05, 1.05)) + plt.xticks(np.linspace(0, 1, 11)) + plt.yticks(np.linspace(0, 1, 11)) + plt.grid(True) + + def select_pareto_front(self, metrics, maximize=True): + pareto_mask = self.pareto_metric_list(metrics, maximise=maximize) + return metrics[pareto_mask] -class AnomalyMetric(QualityMetric): +class AnomalyMetric(QualityMetric): def __init__(self, target, predicted_labels, @@ -617,3 +700,28 @@ def calculate_detection_metric( target=target, predicted_labels=labels).metric() return metric_dict + +REGRESSION_METRIC_DICT = {'r2': r2_score, + 'mse': mean_squared_error, + 'rmse': rmse, + 'mae': mean_absolute_error, + 'msle': mean_squared_log_error, + 'mape': mean_absolute_percentage_error, + 'median_absolute_error': median_absolute_error, + 'explained_variance_score': explained_variance_score, + 'max_error': max_error, + 'd2_absolute_error_score': d2_absolute_error_score} + +CLASSIFICATION_METRIC_DICT = {'accuracy': accuracy_score, + 'f1': f1_score, + 'roc_auc': roc_auc_score, + 'precision': precision_score, + 'logloss': log_loss} + +FORECASTING_METRICS_DICT = { + 'rmse': rmse, + 'mae': mean_absolute_error, + 'median_absolute_error': median_absolute_error, + 'smape': smape, + 'mase': mase + } diff --git a/fedot_ind/core/models/early_tc/ecec.py b/fedot_ind/core/models/early_tc/ecec.py index f6e163d25..792810ce6 100644 --- a/fedot_ind/core/models/early_tc/ecec.py +++ b/fedot_ind/core/models/early_tc/ecec.py @@ -1,9 +1,11 @@ from typing import Optional -from fedot_ind.core.architecture.settings.computational import backend_methods as np + from fedot.core.operations.operation_parameters import OperationParameters +from fedot_ind.core.architecture.settings.computational import backend_methods as np from fedot_ind.core.models.early_tc.base_early_tc import BaseETC -from sklearn.model_selection import cross_val_predict from sklearn.metrics import confusion_matrix +from sklearn.model_selection import cross_val_predict + class ECEC(BaseETC): def __init__(self, params: Optional[OperationParameters] = None): diff --git a/fedot_ind/core/models/early_tc/economy_k.py b/fedot_ind/core/models/early_tc/economy_k.py index c39097189..fae2f409b 100644 --- a/fedot_ind/core/models/early_tc/economy_k.py +++ b/fedot_ind/core/models/early_tc/economy_k.py @@ -1,6 +1,7 @@ from typing import Optional -from fedot_ind.core.architecture.settings.computational import backend_methods as np + from fedot.core.operations.operation_parameters import OperationParameters +from fedot_ind.core.architecture.settings.computational import backend_methods as np from fedot_ind.core.models.early_tc.base_early_tc import BaseETC from sklearn.cluster import KMeans from sklearn.metrics import confusion_matrix diff --git a/fedot_ind/core/models/early_tc/prob_threshold.py b/fedot_ind/core/models/early_tc/prob_threshold.py index 773f79d8e..fd1455a36 100644 --- a/fedot_ind/core/models/early_tc/prob_threshold.py +++ b/fedot_ind/core/models/early_tc/prob_threshold.py @@ -1,6 +1,7 @@ from typing import Optional -from fedot_ind.core.architecture.settings.computational import backend_methods as np + from fedot.core.operations.operation_parameters import OperationParameters +from fedot_ind.core.architecture.settings.computational import backend_methods as np from fedot_ind.core.models.early_tc.base_early_tc import BaseETC class ProbabilityThresholdClassifier(BaseETC): diff --git a/fedot_ind/core/models/early_tc/teaser.py b/fedot_ind/core/models/early_tc/teaser.py index 23d6c078d..2dc905508 100644 --- a/fedot_ind/core/models/early_tc/teaser.py +++ b/fedot_ind/core/models/early_tc/teaser.py @@ -1,6 +1,7 @@ from typing import Optional -from fedot_ind.core.architecture.settings.computational import backend_methods as np + from fedot.core.operations.operation_parameters import OperationParameters +from fedot_ind.core.architecture.settings.computational import backend_methods as np from fedot_ind.core.models.early_tc.base_early_tc import BaseETC from sklearn.model_selection import GridSearchCV from sklearn.svm import OneClassSVM @@ -43,7 +44,7 @@ def _form_X_oc(self, predicted_probas): def _predict(self, X, training=False): estimator_indices, offset = self._select_estimators(X) X_ocs, predicted_probas, predicted_labels = zip( - *[self._predict_one_slave(X, i, offset) for i in estimator_indices] # check boundary + *[self._predict_one_slave(X, i, offset) for i in estimator_indices] ) non_acceptance = self._consecutive_count(predicted_labels) < self.consecutive_predictions X_ocs = np.stack(X_ocs) diff --git a/fedot_ind/core/models/nn/network_impl/base_nn_model.py b/fedot_ind/core/models/nn/network_impl/base_nn_model.py index e9d6c7274..f285853d0 100644 --- a/fedot_ind/core/models/nn/network_impl/base_nn_model.py +++ b/fedot_ind/core/models/nn/network_impl/base_nn_model.py @@ -113,6 +113,69 @@ def _prepare_data(self, ts, split_data: bool = True, collate_fn=None): self.label_encoder = train_dataset.label_encoder return train_loader, val_loader + def _train_one_batch(self, batch, optimizer, loss_fn): + optimizer.zero_grad() + inputs, targets = batch + output = self.model(inputs) + loss = loss_fn(output, targets.float()) + loss.backward() + optimizer.step() + training_loss = loss.data.item() * inputs.size(0) + total = targets.size(0) + correct = (torch.argmax(output, 1) == + torch.argmax(targets, 1)).sum().item() + return training_loss, total, correct + + def _eval_one_batch(self, batch, loss_fn): + inputs, targets = batch + output = self.model(inputs) + loss = loss_fn(output, targets.float()) + valid_loss = loss.data.item() * inputs.size(0) + total = targets.size(0) + correct = (torch.argmax(output, 1) == + torch.argmax(targets, 1)).sum().item() + return valid_loss, total, correct + + def _run_one_epoch(self, train_loader, val_loader, + optimizer, loss_fn, + epoch, val_interval, + early_stopping, scheduler, + best_val_loss): + training_loss = 0.0 + valid_loss = 0.0 + self.model.train() + total = 0 + correct = 0 + best_model = self.model + for batch in tqdm(train_loader): + training_loss_batch, total_batch, correct_batch = self._train_one_batch(batch, optimizer, loss_fn) + training_loss += training_loss_batch + total += total_batch + correct += correct_batch + accuracy = correct / total + training_loss /= len(train_loader.dataset) + print('Epoch: {}, Accuracy = {}, Training Loss: {:.2f}'.format( + epoch, accuracy, training_loss)) + + if val_loader is not None and epoch % val_interval == 0: + self.model.eval() + total = 0 + correct = 0 + for batch in val_loader: + valid_loss_batch, total_batch, correct_batch = self._eval_one_batch(batch, loss_fn) + valid_loss += valid_loss_batch + total += total_batch + correct += correct_batch + if valid_loss < best_val_loss: + best_val_loss = valid_loss + best_model = copy.deepcopy(self.model) + + early_stopping(training_loss, self.model, './') + adjust_learning_rate(optimizer, scheduler, + epoch + 1, self.learning_rate, printout=False) + scheduler.step() + return best_model, best_val_loss + def _train_loop(self, train_loader, val_loader, loss_fn, optimizer): early_stopping = EarlyStopping() scheduler = lr_scheduler.OneCycleLR(optimizer=optimizer, @@ -127,53 +190,13 @@ def _train_loop(self, train_loader, val_loader, loss_fn, optimizer): self.epochs, self.learning_rate) loss_prefix = 'RMSE' if self.is_regression_task else 'Accuracy' for epoch in range(1, self.epochs + 1): - training_loss = 0.0 - valid_loss = 0.0 - self.model.train() - total = 0 - correct = 0 - for batch in tqdm(train_loader): - optimizer.zero_grad() - inputs, targets = batch - output = self.model(inputs) - loss = loss_fn(output, targets.float()) - loss.backward() - optimizer.step() - training_loss += loss.data.item() / inputs.size(0) if self.is_regression_task \ - else loss.data.item() * inputs.size(0) - total += targets.size(0) - correct += (torch.argmax(output, 1) == torch.argmax(targets, 1)).sum().item() \ - if not self.is_regression_task else 0 - - training_loss = training_loss / len(train_loader.dataset) if not self.is_regression_task else training_loss - accuracy = correct / total if not self.is_regression_task else training_loss - print('Epoch: {}, {}= {}, Training Loss: {:.2f}'.format( - epoch, loss_prefix, accuracy, training_loss)) - - if val_loader is not None and epoch % val_interval == 0: - self.model.eval() - total = 0 - correct = 0 - for batch in val_loader: - inputs, targets = batch - output = self.model(inputs) - - loss = loss_fn(output, targets.float()) - - valid_loss += loss.data.item() / inputs.size(0) if self.is_regression_task \ - else loss.data.item() * inputs.size(0) - total += targets.size(0) - correct += (torch.argmax(output, 1) == torch.argmax(targets, 1)).sum().item() \ - if not self.is_regression_task else 0 - if valid_loss < best_val_loss: - best_val_loss = valid_loss - best_model = copy.deepcopy(self.model) - - early_stopping(training_loss, self.model, './') - adjust_learning_rate(optimizer, scheduler, - epoch + 1, self.learning_rate, printout=False) - scheduler.step() - + best_model, best_val_loss = self._run_one_epoch( + train_loader, val_loader, + optimizer, loss_fn, + epoch, val_interval, + early_stopping, scheduler, + best_val_loss + ) if early_stopping.early_stop: print("Early stopping") break diff --git a/fedot_ind/core/models/nn/network_impl/mlstm.py b/fedot_ind/core/models/nn/network_impl/mlstm.py index 3e1d3c4b5..604f28660 100644 --- a/fedot_ind/core/models/nn/network_impl/mlstm.py +++ b/fedot_ind/core/models/nn/network_impl/mlstm.py @@ -154,74 +154,39 @@ def _moving_window_output(self, inputs): batch_interval = inputs[..., i - self.prediction_idx[0] : i + 1] output, hidden_state = self.model(batch_interval, hidden_state, return_hidden=True) return output - - def _train_loop(self, train_loader, val_loader, loss_fn, optimizer): - early_stopping = EarlyStopping() - scheduler = lr_scheduler.OneCycleLR(optimizer=optimizer, - steps_per_epoch=len(train_loader), - epochs=self.epochs, - max_lr=self.learning_rate) - if val_loader is None: - print('Not enough class samples for validation') - - best_model = None - best_val_loss = float('inf') - val_interval = self.get_validation_frequency( - self.epochs, self.learning_rate) - - for epoch in range(1, self.epochs + 1): - training_loss = 0.0 - valid_loss = 0.0 - self.model.train() - total = 0 - correct = 0 - for batch in tqdm(train_loader): - optimizer.zero_grad() - inputs, targets = batch - output = self._moving_window_output(inputs) - loss = loss_fn(output, targets.float()) - loss.backward() - optimizer.step() - training_loss += loss.data.item() * inputs.size(0) - total += targets.size(0) - correct += (torch.argmax(output, 1) == + + def _train_one_batch(self, batch, optimizer, loss_fn): + if self.fitting_mode == 'zero_padding': + return super()._train_one_batch(batch, optimizer, loss_fn) + elif self.fitting_mode == 'moving_window': + optimizer.zero_grad() + inputs, targets = batch + output = self._moving_window_output(inputs) + loss = loss_fn(output, targets.float()) + loss.backward() + optimizer.step() + training_loss = loss.data.item() * inputs.size(0) + total = targets.size(0) + correct = (torch.argmax(output, 1) == torch.argmax(targets, 1)).sum().item() - - accuracy = correct / total - training_loss /= len(train_loader.dataset) - print('Epoch: {}, Accuracy = {}, Training Loss: {:.2f}'.format( - epoch, accuracy, training_loss)) - - if val_loader is not None and epoch % val_interval == 0: - self.model.eval() - total = 0 - correct = 0 - for batch in val_loader: - inputs, targets = batch - - output = self.model(inputs) - - loss = loss_fn(output, targets.float()) - - valid_loss += loss.data.item() * inputs.size(0) - total += targets.size(0) - correct += (torch.argmax(output, 1) == + return training_loss, total, correct + else: + raise ValueError('Unknown fitting mode!') + + def _eval_one_batch(self, batch, loss_fn): + if self.fitting_mode == 'zero_padding': + return super()._eval_one_batch(batch, loss_fn) + elif self.fitting_mode == 'moving_window': + inputs, targets = batch + output = self._moving_window_output(inputs) + loss = loss_fn(output, targets.float()) + valid_loss = loss.data.item() * inputs.size(0) + total = targets.size(0) + correct = (torch.argmax(output, 1) == torch.argmax(targets, 1)).sum().item() - if valid_loss < best_val_loss: - best_val_loss = valid_loss - best_model = copy.deepcopy(self.model) - - early_stopping(training_loss, self.model, './') - adjust_learning_rate(optimizer, scheduler, - epoch + 1, self.learning_rate, printout=False) - scheduler.step() - - if early_stopping.early_stop: - print("Early stopping") - break - - if best_model is not None: - self.model = best_model + return valid_loss, total, correct + else: + raise ValueError('Unknown fitting mode!') @convert_to_3d_torch_array def _predict_model(self, x_test: InputData, output_mode: str = 'default'):