Skip to content

Commit

Permalink
Automated autopep8 fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
autopep8 bot committed Jul 16, 2024
1 parent 81b2a67 commit fa80269
Show file tree
Hide file tree
Showing 12 changed files with 262 additions and 248 deletions.
62 changes: 33 additions & 29 deletions fedot_ind/core/metrics/interval_metrics.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd
from fedot.core.data.data import InputData, OutputData
from typing import Tuple, List, Optional, Union, Literal
import pandas as pd
from typing import Union, Literal


def conf_matrix(actual, predicted):
cm = confusion_matrix(actual, predicted)
return dict(TN=cm[0, 0], FP=cm[0, 1], FN=cm[1, 0], TP=[1, 1])


def average_delay(boundaries, prediction,
point,
use_idx=True,
window_placement='lefter'):
point,
use_idx=True,
window_placement='lefter'):
cp_confusion = extract_cp_cm(boundaries, prediction, use_idx=use_idx, use_switch_point=False)
# statistics
statistics = {
Expand All @@ -29,33 +30,35 @@ def average_delay(boundaries, prediction,
i: time_func(triplet) for i, triplet in cp_confusion['TPs'].items()
}
return detection_history, statistics



def tp_transform(tps):
return np.diff(tps[[1, 0]], axis=0) / np.diff(tps[[-1, 0]], axis=0)


def extract_cp_cm(boundaries: Union[np.array, pd.DataFrame],
prediction: pd.DataFrame,
use_switch_point: bool = True, # if first anomaly dot is considered as changepoint
use_idx: bool = False):
prediction: pd.DataFrame,
use_switch_point: bool = True, # if first anomaly dot is considered as changepoint
use_idx: bool = False):
if isinstance(boundaries, pd.DataFrame):
boundaries = boundaries.values.T
anomaly_tsp = prediction[prediction == 1].sort_index().index
TPs, FNs, FPs = {}, [], []

if boundaries.shape[1]:

FPs += [anomaly_tsp[anomaly_tsp < boundaries[0, 0]]] # left rest
FPs += [anomaly_tsp[anomaly_tsp < boundaries[0, 0]]] # left rest
for i, (b_low, b_up) in enumerate(boundaries):
all_tsp_in_window = prediction[b_low: b_up].index
anomaly_tsp_in_window = anomaly_tsp_in_window & anomaly_tsp
if not len(anomaly_tsp_in_window): # why not false positive? do we expect an anomaly to be in every interval?
if not len(anomaly_tsp_in_window): # why not false positive? do we expect an anomaly to be in every interval?
FNs.append(i if use_idx else all_tsp_in_window)
TPs[i] = [b_low,
anomaly_tsp_in_window[int(use_switch_point)] if use_idx else anomaly_tsp_in_window,
TPs[i] = [b_low,
anomaly_tsp_in_window[int(use_switch_point)] if use_idx else anomaly_tsp_in_window,
b_up]
if not use_idx:
FNs.append(all_tsp_in_window - anomaly_tsp_in_window)
FPs.append(anomaly_tsp[anomaly_tsp > boundaries[-1, -1]]) # right rest
FPs.append(anomaly_tsp[anomaly_tsp > boundaries[-1, -1]]) # right rest
else:
FPs.append(anomaly_tsp)

Expand All @@ -69,9 +72,11 @@ def extract_cp_cm(boundaries: Union[np.array, pd.DataFrame],
)

# cognate of single_detecting_boundaries
def get_boundaries(idx, actual_timestamps, window_size:int = None,
window_placement: Literal['left', 'right', 'central'] = 'left',
intersection_mode: Literal['uniform', 'shift_to_left', 'shift_to_right'] = 'shift_to_left',


def get_boundaries(idx, actual_timestamps, window_size: int = None,
window_placement: Literal['left', 'right', 'central'] = 'left',
intersection_mode: Literal['uniform', 'shift_to_left', 'shift_to_right'] = 'shift_to_left',
):
# idx = idx
# cast everything to pandas object fir the subsequent comfort
Expand All @@ -82,30 +87,30 @@ def get_boundaries(idx, actual_timestamps, window_size:int = None,
else:
idx = pd.Series(idx)
td = window_size
else:
else:
raise TypeError('Unexpected type of ts index')

boundaries = np.tile(actual_timestamps, (2, 1))
# [0, ...] - lower bound, [1, ...] - upper
# [0, ...] - lower bound, [1, ...] - upper
if window_placement == 'left':
boundaries[0] -= td
elif window_placement == 'central':
boundaries[0] -= td / 2
boundaries[1] += td / 2
elif window_placement == 'right':
boundaries[1] += td
boundaries[1] += td
else:
raise ValueError('Unknown mode')

if not len(actual_timestamps):
return boundaries

# intersection resolution
# intersection resolution
for i in range(len(actual_timestamps) - 1):
if not boundaries[0, i + 1] > boundaries[1, i]:
continue

if intersection_mode == 'shift_to_left':
if intersection_mode == 'shift_to_left':
boundaries[0, i + 1] = boundaries[1, i]
elif intersection_mode == 'shift_to_right':
boundaries[1, i] = boundaries[0, i + 1]
Expand All @@ -120,6 +125,7 @@ def get_boundaries(idx, actual_timestamps, window_size:int = None,
boundaries = pd.DataFrame({'lower': boundaries[0], 'upper': boundaries[1]})
return boundaries


def nab(boundaries, predictions, mode='standard', custom_coefs=None):
inner_coefs = {
'low_FP': [1.0, -0.11, -1.0],
Expand All @@ -128,11 +134,9 @@ def nab(boundaries, predictions, mode='standard', custom_coefs=None):
}
coefs = custom_coefs or inner_coefs[mode]
confusion_matrix = extract_cp_cm(boundaries, predictions)

tps = confusion_matrix['tps']

score = np.inner([tps, len(confusion_matrix['FP']), len(confusion_matrix['FN'])],
score = np.inner([tps, len(confusion_matrix['FP']), len(confusion_matrix['FN'])],
coefs)
return score


55 changes: 29 additions & 26 deletions fedot_ind/core/metrics/metrics_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def smape(a, f, _=None):
return 1 / len(a) * np.sum(2 * np.abs(f - a) /
(np.abs(a) + np.abs(f)) * 100)


def rmse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))

Expand Down Expand Up @@ -343,7 +344,8 @@ def kl_divergence(solution: pd.DataFrame,
return np.average(solution.sum(axis=1), weights=sample_weights)
else:
return np.average(solution.mean())



class ETSCPareto(QualityMetric, ParetoMetrics):
def __init__(self,
target,
Expand Down Expand Up @@ -387,7 +389,7 @@ def metric(self) -> float:
for i, metric in enumerate(self.metric_list, 1):
assert metric in CLASSIFICATION_METRIC_DICT, f'{metric} is not found in available metrics'
metric_value = CLASSIFICATION_METRIC_DICT[metric](self.target[mask[est]],
self.predicted_labels[est][mask[est]])
self.predicted_labels[est][mask[est]])
result[est, i] = metric_value

if self.weights is None:
Expand All @@ -399,13 +401,13 @@ def metric(self) -> float:
else:
assert self.weights.shape[-1] == self.metrics.shape[-1], 'Metrics and weights size mismatch!'
self.weights /= self.weights.sum()

result = result @ self.weights.T
if not self.reduce:
return pd.DataFrame(result, columns=self.columns)
else:
return result

def plot_bicrit_metric(self, metrics, select=None, metrics_names=None):
if not metrics_names:
metrics_names = ('Robustness', 'Accuracy')
Expand All @@ -414,8 +416,8 @@ def plot_bicrit_metric(self, metrics, select=None, metrics_names=None):
for i, metric in enumerate(metrics):
selection = metric[select]
sizes = ((np.arange(selection.shape[0]) * 2)[::-1]) ** 1.5 + 10
plt.scatter(*(metric[select]).T,
s=sizes,
plt.scatter(*(metric[select]).T,
s=sizes,
label=i)
plt.legend(loc="upper right", bbox_to_anchor=(1.5, 1))
plt.ylabel(metrics_names[1])
Expand All @@ -425,7 +427,7 @@ def plot_bicrit_metric(self, metrics, select=None, metrics_names=None):
plt.xticks(np.linspace(0, 1, 11))
plt.yticks(np.linspace(0, 1, 11))
plt.grid(True)

def select_pareto_front(self, metrics, maximize=True):
pareto_mask = self.pareto_metric_list(metrics, maximise=maximize)
return metrics[pareto_mask]
Expand Down Expand Up @@ -701,27 +703,28 @@ def calculate_detection_metric(
predicted_labels=labels).metric()
return metric_dict


REGRESSION_METRIC_DICT = {'r2': r2_score,
'mse': mean_squared_error,
'rmse': rmse,
'mae': mean_absolute_error,
'msle': mean_squared_log_error,
'mape': mean_absolute_percentage_error,
'median_absolute_error': median_absolute_error,
'explained_variance_score': explained_variance_score,
'max_error': max_error,
'd2_absolute_error_score': d2_absolute_error_score}
'mse': mean_squared_error,
'rmse': rmse,
'mae': mean_absolute_error,
'msle': mean_squared_log_error,
'mape': mean_absolute_percentage_error,
'median_absolute_error': median_absolute_error,
'explained_variance_score': explained_variance_score,
'max_error': max_error,
'd2_absolute_error_score': d2_absolute_error_score}

CLASSIFICATION_METRIC_DICT = {'accuracy': accuracy_score,
'f1': f1_score,
'roc_auc': roc_auc_score,
'precision': precision_score,
'logloss': log_loss}
'f1': f1_score,
'roc_auc': roc_auc_score,
'precision': precision_score,
'logloss': log_loss}

FORECASTING_METRICS_DICT = {
'rmse': rmse,
'mae': mean_absolute_error,
'median_absolute_error': median_absolute_error,
'smape': smape,
'mase': mase
}
'rmse': rmse,
'mae': mean_absolute_error,
'median_absolute_error': median_absolute_error,
'smape': smape,
'mase': mase
}
36 changes: 19 additions & 17 deletions fedot_ind/core/models/early_tc/base_early_tc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@


class BaseETC(ClassifierMixin, BaseEstimator):
def __init__(self, params: Optional[OperationParameters] = None):
def __init__(self, params: Optional[OperationParameters] = None):
if params is None:
params = {}
params = {}
super().__init__()
self.interval_percentage = params.get('interval_percentage', 10)
self.consecutive_predictions = params.get('consecutive_predictions', 1)
Expand All @@ -26,7 +26,9 @@ def _init_model(self, X, y):
max_data_length = X.shape[-1]
self.prediction_idx = self._compute_prediction_points(max_data_length)
self.n_pred = len(self.prediction_idx)
self.slave_estimators = [WEASEL(random_state=self.random_state, support_probabilities=True, **self.weasel_params) for _ in range(self.n_pred)]
self.slave_estimators = [
WEASEL(random_state=self.random_state, support_probabilities=True, **self.weasel_params)
for _ in range(self.n_pred)]
self.scalers = [StandardScaler() for _ in range(self.n_pred)]
self._chosen_estimator_idx = -1
self.classes_ = [np.unique(y)]
Expand All @@ -37,7 +39,7 @@ def required_length(self):
if not hasattr(self, '_chosen_estimator_idx'):
return None
return self.prediction_idx[self._chosen_estimator_idx]

@property
def n_classes(self):
return len(self.classes_[0])
Expand All @@ -50,23 +52,23 @@ def fit(self, X, y=None):
self._fit_one_interval(X, y, i)

def _fit_one_interval(self, X, y, i):
X_part = X[..., :self.prediction_idx[i] + 1]
X_part = X[..., :self.prediction_idx[i] + 1]
X_part = self.scalers[i].fit_transform(X_part)
probas = self.slave_estimators[i].fit_predict_proba(X_part, y)
return probas

def _predict_one_slave(self, X, i, offset=0):
X_part = X[..., max(0, offset - 1):self.prediction_idx[i] + 1]
X_part = X[..., max(0, offset - 1):self.prediction_idx[i] + 1]
X_part = self.scalers[i].transform(X_part)
probas = self.slave_estimators[i].predict_proba(X_part)
return probas, np.argmax(probas, axis=-1)
return probas, np.argmax(probas, axis=-1)

def _compute_prediction_points(self, n_idx):
interval_length = max(int(n_idx * self.interval_percentage / 100), self.min_ts_length)
prediction_idx = np.arange(n_idx - 1, -1, -interval_length)[::-1][1:]
self.earliness = 1 - prediction_idx / n_idx # /n_idx because else the last hm score is always 0
self.earliness = 1 - prediction_idx / n_idx # /n_idx because else the last hm score is always 0
return prediction_idx

def _select_estimators(self, X, training=False):
offset = 0
if not training and self.prediction_mode == 'best_by_harmonic_mean':
Expand All @@ -80,15 +82,15 @@ def _select_estimators(self, X, training=False):
else:
raise ValueError('Unknown prediction mode')
return estimator_indices, offset

def _predict(self, X, training=True):
estimator_indices, offset = self._select_estimators(X, training)
if not training:
self._estimator_for_predict = estimator_indices
prediction = (np.stack(array_list) for array_list in zip(
*[self._predict_one_slave(X, i, offset) for i in estimator_indices] # check boundary
*[self._predict_one_slave(X, i, offset) for i in estimator_indices] # check boundary
))
return prediction # see the output in _predict_one_slave
return prediction # see the output in _predict_one_slave

def _consecutive_count(self, predicted_labels: List[np.array]):
n = len(predicted_labels[0])
Expand All @@ -97,18 +99,18 @@ def _consecutive_count(self, predicted_labels: List[np.array]):
for i in range(1, prediction_points):
equal = predicted_labels[i - 1] == predicted_labels[i]
consecutive_labels[i, equal] = consecutive_labels[i - 1, equal] + 1
return consecutive_labels # prediction_points x n_instances
return consecutive_labels # prediction_points x n_instances

def predict_proba(self, *args):
predicted_probas, scores, *_ = args
predicted_probas, scores, *_ = args
if self.transform_score:
scores = self._transform_score(scores)
scores = np.tile(scores[..., None], (1, 1, self.n_classes))
prediction = np.stack([predicted_probas, scores], axis=0)
if prediction.shape[1] == 1:
prediction = prediction.squeeze(1)
return prediction

def predict(self, X):
prediction = self.predict_proba(X)
labels = prediction[0:1].argmax(-1)
Expand Down
Loading

0 comments on commit fa80269

Please sign in to comment.