Skip to content

Commit

Permalink
refactored train loop + microfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
leostre committed Jul 15, 2024
1 parent 926eb92 commit 49d0570
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 126 deletions.
122 changes: 115 additions & 7 deletions fedot_ind/core/metrics/metrics_implementation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Optional
from typing import Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from fedot.core.data.data import InputData
Expand Down Expand Up @@ -220,6 +221,9 @@ def smape(a, f, _=None):
return 1 / len(a) * np.sum(2 * np.abs(f - a) /
(np.abs(a) + np.abs(f)) * 100)

def rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))


def mape(A, F):
return mean_absolute_percentage_error(A, F)
Expand All @@ -232,9 +236,6 @@ def calculate_regression_metric(target,
**kwargs):
target = target.astype(float)

def rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))

metric_dict = {'r2': r2_score,
'mse': mean_squared_error,
'rmse': rmse,
Expand All @@ -261,9 +262,6 @@ def calculate_forecasting_metric(target,
**kwargs):
target = target.astype(float)

def rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))

metric_dict = {
'rmse': rmse,
'mae': mean_absolute_error,
Expand Down Expand Up @@ -345,10 +343,95 @@ def kl_divergence(solution: pd.DataFrame,
return np.average(solution.sum(axis=1), weights=sample_weights)
else:
return np.average(solution.mean())

class ETSCPareto(QualityMetric, ParetoMetrics):
def __init__(self,
target,
predicted_labels,
predicted_probs=None,
weigths: tuple = None,
mode: str = 'robust',
reduce: bool = True,
metric_list: tuple = (
'f1', 'roc_auc', 'accuracy', 'logloss', 'precision'),
default_value: float = 0.0):
self.target = target.flatten()
self.predicted_labels = predicted_labels
self.predicted_probs = predicted_probs
self.metric_list = metric_list
self.default_value = default_value
self.weights = weigths
self.mode = mode
self.columns = ['robustness'] if self.mode == 'robust' else []
self.columns.extend(metric_list)
self.reduce = reduce

def metric(self) -> float:
if len(self.predicted_labels.shape) == 1:
self.predicted_labels = self.predicted_labels[None, ...]
self.predicted_probs = self.predicted_probs[None, ...]

n_metrics = len(self.metric_list) + (self.mode == 'robust')
n_est = self.predicted_labels.shape[0]
result = np.zeros((n_est, n_metrics))
if self.mode == 'robust':
mask = self.predicted_probs >= 0
if not mask.any():
return result
robustness = mask.sum(-1) / self.predicted_probs.shape[-1]
result[:, 0] = robustness.flatten()
else:
mask = np.ones_like(self.predicted_probs, dtype=bool)

for est in range(n_est):
for i, metric in enumerate(self.metric_list, 1):
assert metric in CLASSIFICATION_METRIC_DICT, f'{metric} is not found in available metrics'
metric_value = CLASSIFICATION_METRIC_DICT[metric](self.target[mask[est]],
self.predicted_labels[est][mask[est]])
result[est, i] = metric_value

if self.weights is None:
if self.reduce:
self.weights = np.empty(n_metrics)
self.weights.fill(1 / len(self.weights))
else:
self.weights = np.eye(n_metrics)
else:
assert self.weights.shape[-1] == self.metrics.shape[-1], 'Metrics and weights size mismatch!'
self.weights /= self.weights.sum()

result = result @ self.weights.T
if not self.reduce:
return pd.DataFrame(result, columns=self.columns)
else:
return result

def plot_bicrit_metric(self, metrics, select=None, metrics_names=None):
if not metrics_names:
metrics_names = ('Robustness', 'Accuracy')
plt.figure(figsize=(10, 10))
assert metrics.shape[-1] == 2, 'only 2 metrics can be plotted'
for i, metric in enumerate(metrics):
selection = metric[select]
sizes = ((np.arange(selection.shape[0]) * 2)[::-1]) ** 1.5 + 10
plt.scatter(*(metric[select]).T,
s=sizes,
label=i)
plt.legend(loc="upper right", bbox_to_anchor=(1.5, 1))
plt.ylabel(metrics_names[1])
plt.xlabel(metrics_names[0])
plt.xlim((-0.05, 1.05))
plt.ylim((-0.05, 1.05))
plt.xticks(np.linspace(0, 1, 11))
plt.yticks(np.linspace(0, 1, 11))
plt.grid(True)

def select_pareto_front(self, metrics, maximize=True):
pareto_mask = self.pareto_metric_list(metrics, maximise=maximize)
return metrics[pareto_mask]

class AnomalyMetric(QualityMetric):

class AnomalyMetric(QualityMetric):
def __init__(self,
target,
predicted_labels,
Expand Down Expand Up @@ -617,3 +700,28 @@ def calculate_detection_metric(
target=target,
predicted_labels=labels).metric()
return metric_dict

REGRESSION_METRIC_DICT = {'r2': r2_score,
'mse': mean_squared_error,
'rmse': rmse,
'mae': mean_absolute_error,
'msle': mean_squared_log_error,
'mape': mean_absolute_percentage_error,
'median_absolute_error': median_absolute_error,
'explained_variance_score': explained_variance_score,
'max_error': max_error,
'd2_absolute_error_score': d2_absolute_error_score}

CLASSIFICATION_METRIC_DICT = {'accuracy': accuracy_score,
'f1': f1_score,
'roc_auc': roc_auc_score,
'precision': precision_score,
'logloss': log_loss}

FORECASTING_METRICS_DICT = {
'rmse': rmse,
'mae': mean_absolute_error,
'median_absolute_error': median_absolute_error,
'smape': smape,
'mase': mase
}
6 changes: 4 additions & 2 deletions fedot_ind/core/models/early_tc/ecec.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from typing import Optional
from fedot_ind.core.architecture.settings.computational import backend_methods as np

from fedot.core.operations.operation_parameters import OperationParameters
from fedot_ind.core.architecture.settings.computational import backend_methods as np
from fedot_ind.core.models.early_tc.base_early_tc import BaseETC
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict


class ECEC(BaseETC):
def __init__(self, params: Optional[OperationParameters] = None):
Expand Down
3 changes: 2 additions & 1 deletion fedot_ind/core/models/early_tc/economy_k.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Optional
from fedot_ind.core.architecture.settings.computational import backend_methods as np

from fedot.core.operations.operation_parameters import OperationParameters
from fedot_ind.core.architecture.settings.computational import backend_methods as np
from fedot_ind.core.models.early_tc.base_early_tc import BaseETC
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix
Expand Down
3 changes: 2 additions & 1 deletion fedot_ind/core/models/early_tc/prob_threshold.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Optional
from fedot_ind.core.architecture.settings.computational import backend_methods as np

from fedot.core.operations.operation_parameters import OperationParameters
from fedot_ind.core.architecture.settings.computational import backend_methods as np
from fedot_ind.core.models.early_tc.base_early_tc import BaseETC

class ProbabilityThresholdClassifier(BaseETC):
Expand Down
5 changes: 3 additions & 2 deletions fedot_ind/core/models/early_tc/teaser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Optional
from fedot_ind.core.architecture.settings.computational import backend_methods as np

from fedot.core.operations.operation_parameters import OperationParameters
from fedot_ind.core.architecture.settings.computational import backend_methods as np
from fedot_ind.core.models.early_tc.base_early_tc import BaseETC
from sklearn.model_selection import GridSearchCV
from sklearn.svm import OneClassSVM
Expand Down Expand Up @@ -43,7 +44,7 @@ def _form_X_oc(self, predicted_probas):
def _predict(self, X, training=False):
estimator_indices, offset = self._select_estimators(X)
X_ocs, predicted_probas, predicted_labels = zip(
*[self._predict_one_slave(X, i, offset) for i in estimator_indices] # check boundary
*[self._predict_one_slave(X, i, offset) for i in estimator_indices]
)
non_acceptance = self._consecutive_count(predicted_labels) < self.consecutive_predictions
X_ocs = np.stack(X_ocs)
Expand Down
117 changes: 70 additions & 47 deletions fedot_ind/core/models/nn/network_impl/base_nn_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,69 @@ def _prepare_data(self, ts, split_data: bool = True, collate_fn=None):
self.label_encoder = train_dataset.label_encoder
return train_loader, val_loader

def _train_one_batch(self, batch, optimizer, loss_fn):
optimizer.zero_grad()
inputs, targets = batch
output = self.model(inputs)
loss = loss_fn(output, targets.float())
loss.backward()
optimizer.step()
training_loss = loss.data.item() * inputs.size(0)
total = targets.size(0)
correct = (torch.argmax(output, 1) ==
torch.argmax(targets, 1)).sum().item()
return training_loss, total, correct

def _eval_one_batch(self, batch, loss_fn):
inputs, targets = batch
output = self.model(inputs)
loss = loss_fn(output, targets.float())
valid_loss = loss.data.item() * inputs.size(0)
total = targets.size(0)
correct = (torch.argmax(output, 1) ==
torch.argmax(targets, 1)).sum().item()
return valid_loss, total, correct

def _run_one_epoch(self, train_loader, val_loader,
optimizer, loss_fn,
epoch, val_interval,
early_stopping, scheduler,
best_val_loss):
training_loss = 0.0
valid_loss = 0.0
self.model.train()
total = 0
correct = 0
best_model = self.model
for batch in tqdm(train_loader):
training_loss_batch, total_batch, correct_batch = self._train_one_batch(batch, optimizer, loss_fn)
training_loss += training_loss_batch
total += total_batch
correct += correct_batch
accuracy = correct / total
training_loss /= len(train_loader.dataset)
print('Epoch: {}, Accuracy = {}, Training Loss: {:.2f}'.format(
epoch, accuracy, training_loss))

if val_loader is not None and epoch % val_interval == 0:
self.model.eval()
total = 0
correct = 0
for batch in val_loader:
valid_loss_batch, total_batch, correct_batch = self._eval_one_batch(batch, loss_fn)
valid_loss += valid_loss_batch
total += total_batch
correct += correct_batch
if valid_loss < best_val_loss:
best_val_loss = valid_loss
best_model = copy.deepcopy(self.model)

early_stopping(training_loss, self.model, './')
adjust_learning_rate(optimizer, scheduler,
epoch + 1, self.learning_rate, printout=False)
scheduler.step()
return best_model, best_val_loss

def _train_loop(self, train_loader, val_loader, loss_fn, optimizer):
early_stopping = EarlyStopping()
scheduler = lr_scheduler.OneCycleLR(optimizer=optimizer,
Expand All @@ -127,53 +190,13 @@ def _train_loop(self, train_loader, val_loader, loss_fn, optimizer):
self.epochs, self.learning_rate)
loss_prefix = 'RMSE' if self.is_regression_task else 'Accuracy'
for epoch in range(1, self.epochs + 1):
training_loss = 0.0
valid_loss = 0.0
self.model.train()
total = 0
correct = 0
for batch in tqdm(train_loader):
optimizer.zero_grad()
inputs, targets = batch
output = self.model(inputs)
loss = loss_fn(output, targets.float())
loss.backward()
optimizer.step()
training_loss += loss.data.item() / inputs.size(0) if self.is_regression_task \
else loss.data.item() * inputs.size(0)
total += targets.size(0)
correct += (torch.argmax(output, 1) == torch.argmax(targets, 1)).sum().item() \
if not self.is_regression_task else 0

training_loss = training_loss / len(train_loader.dataset) if not self.is_regression_task else training_loss
accuracy = correct / total if not self.is_regression_task else training_loss
print('Epoch: {}, {}= {}, Training Loss: {:.2f}'.format(
epoch, loss_prefix, accuracy, training_loss))

if val_loader is not None and epoch % val_interval == 0:
self.model.eval()
total = 0
correct = 0
for batch in val_loader:
inputs, targets = batch
output = self.model(inputs)

loss = loss_fn(output, targets.float())

valid_loss += loss.data.item() / inputs.size(0) if self.is_regression_task \
else loss.data.item() * inputs.size(0)
total += targets.size(0)
correct += (torch.argmax(output, 1) == torch.argmax(targets, 1)).sum().item() \
if not self.is_regression_task else 0
if valid_loss < best_val_loss:
best_val_loss = valid_loss
best_model = copy.deepcopy(self.model)

early_stopping(training_loss, self.model, './')
adjust_learning_rate(optimizer, scheduler,
epoch + 1, self.learning_rate, printout=False)
scheduler.step()

best_model, best_val_loss = self._run_one_epoch(
train_loader, val_loader,
optimizer, loss_fn,
epoch, val_interval,
early_stopping, scheduler,
best_val_loss
)
if early_stopping.early_stop:
print("Early stopping")
break
Expand Down
Loading

0 comments on commit 49d0570

Please sign in to comment.