Skip to content

Commit

Permalink
cleaned up some unnecessary junk
Browse files Browse the repository at this point in the history
  • Loading branch information
technocreep committed Dec 21, 2023
1 parent 12d6363 commit 4b539d8
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 231 deletions.
67 changes: 67 additions & 0 deletions examples/ensemble/kernel_ensemble_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from fedot import Fedot

from fedot_ind.core.ensemble.kernel_ensemble import init_kernel_ensemble
from fedot_ind.core.ensemble.rank_ensembler import RankEnsemble
from fedot_ind.tools.loader import DataLoader

n_best = 3
feature_dict = {}
metric_list = []
proba_dict = {}
metric_dict = {}
dataset_name = 'Lightning2'
kernel_list = {'wavelet': [
{'feature_generator_type': 'signal',
'feature_hyperparams': {
'wavelet': "mexh",
'n_components': 2
}},
{'feature_generator_type': 'signal',
'feature_hyperparams': {
'wavelet': "morl",
'n_components': 2
}}],
'quantile': [
{'feature_generator_type': 'quantile',
'feature_hyperparams': {
'window_mode': True,
'window_size': 25
}
},
{'feature_generator_type': 'quantile',
'feature_hyperparams': {
'window_mode': False,
'window_size': 40
}
}]
}
fg_names = []
for key in kernel_list:
for model_params in kernel_list[key]:
fg_names.append(f'{key}_{model_params}')

train_data, test_data = DataLoader(dataset_name).load_data()
set_of_fg, train_feats, train_target, test_feats, test_target = init_kernel_ensemble(train_data,
test_data,
kernel_list=kernel_list)

n_best_generators = set_of_fg.T.nlargest(n_best, 0).index
for rank in range(n_best):
fg_rank = n_best_generators[rank]
train_best = train_feats[fg_rank]
test_best = test_feats[fg_rank]
feature_dict.update({fg_names[rank]: (test_best, test_best)})

for model_name, feature in feature_dict.items():
industrial = Fedot(metric='roc_auc', timeout=5, problem='classification', n_jobs=6)

model = industrial.fit(feature[0], train_target)
labels = industrial.predict(feature[1])
proba_dict.update({model_name: industrial.predict_proba(feature[1])})
metric_dict.update({model_name: industrial.get_metrics(test_target, metric_names=['roc_auc', 'f1', 'accuracy'])})
rank_ensembler = RankEnsemble(dataset_name=dataset_name,
proba_dict={dataset_name: proba_dict},
metric_dict={dataset_name: metric_dict})

ensemble_result = rank_ensembler.ensemble()
_ = 1
67 changes: 0 additions & 67 deletions fedot_ind/core/ensemble/kernel_ensemble.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import numpy as np
import pandas as pd
from fedot.api.main import Fedot
from MKLpy.algorithms import FHeuristic, RMKL
from MKLpy.callbacks import EarlyStopping
from MKLpy.scheduler import ReduceOnWorsening
from scipy.spatial.distance import pdist, squareform

from fedot_ind.core.architecture.pipelines.classification import ClassificationPipelines
from fedot_ind.core.architecture.settings.pipeline_factory import KernelFeatureGenerator
from fedot_ind.core.ensemble.rank_ensembler import RankEnsemble
from fedot_ind.tools.loader import DataLoader


class KernelEnsembler(ClassificationPipelines):
Expand Down Expand Up @@ -99,67 +96,3 @@ def init_kernel_ensemble(train_data,
test_target = kernels.test_target

return set_of_fg, train_feats, train_target, test_feats, test_target


if __name__ == '__main__':
n_best = 3
feature_dict = {}
metric_list = []
proba_dict = {}
metric_dict = {}
dataset_name = 'Lightning2'
kernel_list = {'wavelet': [
{'feature_generator_type': 'signal',
'feature_hyperparams': {
'wavelet': "mexh",
'n_components': 2
}},
{'feature_generator_type': 'signal',
'feature_hyperparams': {
'wavelet': "morl",
'n_components': 2
}}],
'quantile': [
{'feature_generator_type': 'quantile',
'feature_hyperparams': {
'window_mode': True,
'window_size': 25
}
},
{'feature_generator_type': 'quantile',
'feature_hyperparams': {
'window_mode': False,
'window_size': 40
}
}]
}
fg_names = []
for key in kernel_list:
for model_params in kernel_list[key]:
fg_names.append(f'{key}_{model_params}')

train_data, test_data = DataLoader(dataset_name).load_data()
set_of_fg, train_feats, train_target, test_feats, test_target = init_kernel_ensemble(train_data,
test_data,
kernel_list=kernel_list)

n_best_generators = set_of_fg.T.nlargest(n_best, 0).index
for rank in range(n_best):
fg_rank = n_best_generators[rank]
train_best = train_feats[fg_rank]
test_best = test_feats[fg_rank]
feature_dict.update({fg_names[rank]: (test_best, test_best)})

for model_name, feature in feature_dict.items():
industrial = Fedot(metric='roc_auc', timeout=5, problem='classification', n_jobs=6)

model = industrial.fit(feature[0], train_target)
labels = industrial.predict(feature[1])
proba_dict.update({model_name: industrial.predict_proba(feature[1])})
metric_dict.update({model_name: industrial.get_metrics(test_target, metric_names=['roc_auc', 'f1', 'accuracy'])})
rank_ensembler = RankEnsemble(dataset_name=dataset_name,
proba_dict={dataset_name: proba_dict},
metric_dict={dataset_name: metric_dict})

ensemble_result = rank_ensembler.ensemble()
_ = 1
Original file line number Diff line number Diff line change
Expand Up @@ -103,54 +103,3 @@ def get_random_sparse_matrix(size: tuple):
if np.random.rand() < 0.1:
matrix[i, j] = np.random.rand()
return matrix


if __name__ == '__main__':
from fedot_ind.tools.loader import DataLoader

arr = np.array([[1, 1, 1, 0, 0],
[3, 3, 3, 0, 0],
[4, 4, 4, 0, 0],
[5, 5, 5, 0, 0],
[0, 0, 0, 4, 4],
[0, 0, 0, 5, 5],
[0, 0, 0, 2, 2]])

(X_train, y_train), (X_test, y_test) = DataLoader('Lightning7').load_data()

# init_ts = train[0].iloc[0, :].values
# scaler = MinMaxScaler()
# scaler.fit(init_ts.reshape(-1, 1))
# single_ts = scaler.transform(init_ts.reshape(-1, 1)).reshape(-1)

cur = CURDecomposition(rank=20)
# M = cur.ts_to_matrix(single_ts, 30)
C, U, R = cur.fit_transform(X_train)
basis = cur.reconstruct_basis(C, U, R, X_train.shape[1])

# rec_ts = cur.matrix_to_ts(C @ U @ R)
# err = np.linalg.norm(single_ts - rec_ts)

# plt.plot(init_ts, label='init_ts')
# plt.plot(scaler.inverse_transform(rec_ts.reshape(-1, 1)), label='rec_ts')
# plt.legend()
# plt.show()
_ = 1

# ranks = list(range(5, 20))
# cur_errors = []
# with tqdm(total=len(ranks), desc='cur') as pbar:
# for rank in ranks:
# cur = CURDecomposition(rank=rank)
# C, U, R = cur.fit_transform(M)
# cur_errors.append(np.linalg.norm(M - C @ U @ R))
# pbar.update(1)

# f,a = plt.subplots(2, 1, figsize=(10, 10))
# # a[0].plot(ranks, svd_errors, label='svd')
# a[1].plot(ranks, cur_errors, label='cur')
# a[0].set_title('svd')
# a[1].set_title('cur')
# plt.legend()
# plt.show()
_ = 1
39 changes: 0 additions & 39 deletions fedot_ind/core/operation/transformation/splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,6 @@ def balance_with_non_anomaly(self, series, target, features, non_anomaly_interva
ts = series.copy()
counter = 0
taken_slots = pd.Series([0 for _ in range(len(ts))])
# for non_anom in non_anomaly_intervals:
# taken_slots[non_anom[0]:non_anom[1]] = 0

while len(non_anomaly_ts_list) != number_of_anomalies and counter != number_of_anomalies * 100:
seed = np.random.randint(1000)
Expand Down Expand Up @@ -285,40 +283,3 @@ def _transform_test(self, series: np.array):
transformed_data.append(series_part)
transformed_data = np.stack(transformed_data)
return transformed_data


if __name__ == '__main__':
uni_ts = np.random.rand(800)
anomaly_d_uni = {'anomaly1': [[40, 50], [60, 80], [200, 220], [410, 420], [513, 524], [641, 645]],
'anomaly2': [[130, 170], [300, 320], [400, 410], [589, 620], [715, 720]],
'anomaly3': [[500, 530], [710, 740]],
'anomaly4': [[77, 90], [98, 112], [145, 158], [290, 322]]}

ts1 = np.arange(0, 100)
multi_ts = np.array([ts1, ts1 * 2, ts1 * 3]).T
anomaly_d_multi = {'anomaly1': [[0, 5], [15, 20], [22, 24], [55, 63], [70, 90]],
'anomaly2': [[10, 12], [15, 16], [27, 31], [44, 50], [98, 100]],
'anomaly3': [[0, 3], [15, 18], [19, 24], [55, 60], [85, 90]]}

splitter_multi = TSTransformer()
train_multi, test_multi = splitter_multi.transform_for_fit(series=multi_ts,
anomaly_dict=anomaly_d_multi, plot=False, binarize=True)

splitter_uni = TSTransformer()
train_uni, test_uni = splitter_uni.transform_for_fit(series=uni_ts,
anomaly_dict=anomaly_d_uni, plot=True, binarize=True)

unique_ts = np.random.rand(800)
anomaly_unique = {
'class1': [[0, 10], [20, 30], [50, 60], [70, 80], [100, 110], [120, 130], [160, 170], 200, 210, 310, 330, 350,
370, 410, 430, 460, 480, 500, 520, [540, 560], [590, 610], [630, 650], [680, 700], [720, 740],
[760, 780], [80, 100], [320, 340]],
'class2': [[0, 20], [50, 70], [100, 120], [140, 160], [190, 210], [230, 250], [270, 290], [240, 250],
[270, 280], [330, 340], [360, 370], [400, 410], [440, 450], [480, 490], [520, 530], [570, 580],
[610, 620], [660, 670], [700, 710]]}

# splitter_unique = TSTransformer(strategy='unique')
# unique_cls, unique_train, unique_test = splitter_unique.transform_for_fit(series=unique_ts,
# anomaly_dict=anomaly_unique, plot=True,
# binarize=False)
_ = 1
47 changes: 1 addition & 46 deletions fedot_ind/tools/explain/distances.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import numpy as np
from scipy.spatial.distance import cosine, euclidean
from scipy.stats import cramervonmises
from scipy.stats import energy_distance
from scipy.stats import entropy
from scipy.stats import ks_2samp


def kl_divergence(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
Expand Down Expand Up @@ -45,34 +43,6 @@ def total_variation_distance(probs_before: np.ndarray, probs_after: np.ndarray)
return 0.5 * np.sum(np.abs(probs_before - probs_after))


def cramer_von_mises_statistic(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
"""
The Cramer-von Mises statistic tests the goodness-of-fit of two samples, measuring the
similarity of their distributions.
Args:
probs_before: The probability distribution before some event.
probs_after: The probability distribution after the same event.
"""
_, p_value = cramervonmises(probs_before, cdf='uniform')
return p_value


def kolmogorov_smirnov_statistic(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
"""
The Kolmogorov-Smirnov statistic tests the equality of two samples, measuring the maximum
difference between their empirical cumulative distribution functions.
Args:
probs_before: The probability distribution before some event.
probs_after: The probability distribution after the same event.
"""
_, p_value = ks_2samp(probs_before, probs_after)
return p_value


def energy_distance_measure(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
"""
Energy Distance measures the distance between the characteristic functions of two distributions.
Expand All @@ -97,18 +67,6 @@ def hellinger_distance(probs_before: np.ndarray, probs_after: np.ndarray) -> flo
return np.sqrt(np.sum((np.sqrt(probs_before) - np.sqrt(probs_after)) ** 2)) / np.sqrt(2)


def bhattacharyya_distance(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
"""
Bhattacharyya Distance measures the similarity between two probability distributions.
Args:
probs_before: The probability distribution before some event.
probs_after: The probability distribution after the same event.
"""
return -np.log(np.sum(np.sqrt(probs_before * probs_after)))


def cosine_distance(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
"""
Cosine Distance measures the cosine of the angle between two vectors, indicating their similarity.
Expand Down Expand Up @@ -145,11 +103,8 @@ def rmse(p, q):
cosine=cosine_distance,
euclidean=euclidean_distance,
hellinger=hellinger_distance,
# bhattacharyya=bhattacharyya_distance,
energy=energy_distance_measure,
# kolmogorov=kolmogorov_smirnov_statistic,
# cramer=cramer_von_mises_statistic,
# total_variation=total_variation_distance,
total_variation=total_variation_distance,
jensen_shannon=jensen_shannon_divergence,
kl_div=kl_divergence,
cross_entropy=cross_entropy,
Expand Down
Loading

0 comments on commit 4b539d8

Please sign in to comment.