cleaned up some unnecessary junk

aimclub · Dec 21, 2023 · 4b539d8 · 4b539d8
1 parent 12d6363
commit 4b539d8
Show file tree

Hide file tree

Showing 6 changed files with 77 additions and 231 deletions.
diff --git a/examples/ensemble/kernel_ensemble_example.py b/examples/ensemble/kernel_ensemble_example.py
@@ -0,0 +1,67 @@
+from fedot import Fedot
+
+from fedot_ind.core.ensemble.kernel_ensemble import init_kernel_ensemble
+from fedot_ind.core.ensemble.rank_ensembler import RankEnsemble
+from fedot_ind.tools.loader import DataLoader
+
+n_best = 3
+feature_dict = {}
+metric_list = []
+proba_dict = {}
+metric_dict = {}
+dataset_name = 'Lightning2'
+kernel_list = {'wavelet': [
+    {'feature_generator_type': 'signal',
+     'feature_hyperparams': {
+         'wavelet': "mexh",
+         'n_components': 2
+     }},
+    {'feature_generator_type': 'signal',
+     'feature_hyperparams': {
+         'wavelet': "morl",
+         'n_components': 2
+     }}],
+    'quantile': [
+        {'feature_generator_type': 'quantile',
+         'feature_hyperparams': {
+             'window_mode': True,
+             'window_size': 25
+         }
+         },
+        {'feature_generator_type': 'quantile',
+         'feature_hyperparams': {
+             'window_mode': False,
+             'window_size': 40
+         }
+         }]
+}
+fg_names = []
+for key in kernel_list:
+    for model_params in kernel_list[key]:
+        fg_names.append(f'{key}_{model_params}')
+
+train_data, test_data = DataLoader(dataset_name).load_data()
+set_of_fg, train_feats, train_target, test_feats, test_target = init_kernel_ensemble(train_data,
+                                                                                     test_data,
+                                                                                     kernel_list=kernel_list)
+
+n_best_generators = set_of_fg.T.nlargest(n_best, 0).index
+for rank in range(n_best):
+    fg_rank = n_best_generators[rank]
+    train_best = train_feats[fg_rank]
+    test_best = test_feats[fg_rank]
+    feature_dict.update({fg_names[rank]: (test_best, test_best)})
+
+for model_name, feature in feature_dict.items():
+    industrial = Fedot(metric='roc_auc', timeout=5, problem='classification', n_jobs=6)
+
+    model = industrial.fit(feature[0], train_target)
+    labels = industrial.predict(feature[1])
+    proba_dict.update({model_name: industrial.predict_proba(feature[1])})
+    metric_dict.update({model_name: industrial.get_metrics(test_target, metric_names=['roc_auc', 'f1', 'accuracy'])})
+rank_ensembler = RankEnsemble(dataset_name=dataset_name,
+                              proba_dict={dataset_name: proba_dict},
+                              metric_dict={dataset_name: metric_dict})
+
+ensemble_result = rank_ensembler.ensemble()
+_ = 1
diff --git a/fedot_ind/core/ensemble/kernel_ensemble.py b/fedot_ind/core/ensemble/kernel_ensemble.py
@@ -1,15 +1,12 @@
 import numpy as np
 import pandas as pd
-from fedot.api.main import Fedot
 from MKLpy.algorithms import FHeuristic, RMKL
 from MKLpy.callbacks import EarlyStopping
 from MKLpy.scheduler import ReduceOnWorsening
 from scipy.spatial.distance import pdist, squareform
 
 from fedot_ind.core.architecture.pipelines.classification import ClassificationPipelines
 from fedot_ind.core.architecture.settings.pipeline_factory import KernelFeatureGenerator
-from fedot_ind.core.ensemble.rank_ensembler import RankEnsemble
-from fedot_ind.tools.loader import DataLoader
 
 
 class KernelEnsembler(ClassificationPipelines):
@@ -99,67 +96,3 @@ def init_kernel_ensemble(train_data,
     test_target = kernels.test_target
 
     return set_of_fg, train_feats, train_target, test_feats, test_target
-
-
-if __name__ == '__main__':
-    n_best = 3
-    feature_dict = {}
-    metric_list = []
-    proba_dict = {}
-    metric_dict = {}
-    dataset_name = 'Lightning2'
-    kernel_list = {'wavelet': [
-        {'feature_generator_type': 'signal',
-         'feature_hyperparams': {
-             'wavelet': "mexh",
-             'n_components': 2
-         }},
-        {'feature_generator_type': 'signal',
-         'feature_hyperparams': {
-             'wavelet': "morl",
-             'n_components': 2
-         }}],
-        'quantile': [
-            {'feature_generator_type': 'quantile',
-             'feature_hyperparams': {
-                 'window_mode': True,
-                 'window_size': 25
-             }
-             },
-            {'feature_generator_type': 'quantile',
-             'feature_hyperparams': {
-                 'window_mode': False,
-                 'window_size': 40
-             }
-             }]
-    }
-    fg_names = []
-    for key in kernel_list:
-        for model_params in kernel_list[key]:
-            fg_names.append(f'{key}_{model_params}')
-
-    train_data, test_data = DataLoader(dataset_name).load_data()
-    set_of_fg, train_feats, train_target, test_feats, test_target = init_kernel_ensemble(train_data,
-                                                                                         test_data,
-                                                                                         kernel_list=kernel_list)
-
-    n_best_generators = set_of_fg.T.nlargest(n_best, 0).index
-    for rank in range(n_best):
-        fg_rank = n_best_generators[rank]
-        train_best = train_feats[fg_rank]
-        test_best = test_feats[fg_rank]
-        feature_dict.update({fg_names[rank]: (test_best, test_best)})
-
-    for model_name, feature in feature_dict.items():
-        industrial = Fedot(metric='roc_auc', timeout=5, problem='classification', n_jobs=6)
-
-        model = industrial.fit(feature[0], train_target)
-        labels = industrial.predict(feature[1])
-        proba_dict.update({model_name: industrial.predict_proba(feature[1])})
-        metric_dict.update({model_name: industrial.get_metrics(test_target, metric_names=['roc_auc', 'f1', 'accuracy'])})
-    rank_ensembler = RankEnsemble(dataset_name=dataset_name,
-                                  proba_dict={dataset_name: proba_dict},
-                                  metric_dict={dataset_name: metric_dict})
-
-    ensemble_result = rank_ensembler.ensemble()
-    _ = 1
diff --git a/fedot_ind/core/operation/decomposition/matrix_decomposition/column_sampling_decomposition.py b/fedot_ind/core/operation/decomposition/matrix_decomposition/column_sampling_decomposition.py
@@ -103,54 +103,3 @@ def get_random_sparse_matrix(size: tuple):
             if np.random.rand() < 0.1:
                 matrix[i, j] = np.random.rand()
     return matrix
-
-
-if __name__ == '__main__':
-    from fedot_ind.tools.loader import DataLoader
-
-    arr = np.array([[1, 1, 1, 0, 0],
-                    [3, 3, 3, 0, 0],
-                    [4, 4, 4, 0, 0],
-                    [5, 5, 5, 0, 0],
-                    [0, 0, 0, 4, 4],
-                    [0, 0, 0, 5, 5],
-                    [0, 0, 0, 2, 2]])
-
-    (X_train, y_train), (X_test, y_test) = DataLoader('Lightning7').load_data()
-
-    # init_ts = train[0].iloc[0, :].values
-    # scaler = MinMaxScaler()
-    # scaler.fit(init_ts.reshape(-1, 1))
-    # single_ts = scaler.transform(init_ts.reshape(-1, 1)).reshape(-1)
-
-    cur = CURDecomposition(rank=20)
-    # M = cur.ts_to_matrix(single_ts, 30)
-    C, U, R = cur.fit_transform(X_train)
-    basis = cur.reconstruct_basis(C, U, R, X_train.shape[1])
-
-    # rec_ts = cur.matrix_to_ts(C @ U @ R)
-    # err = np.linalg.norm(single_ts - rec_ts)
-
-    # plt.plot(init_ts, label='init_ts')
-    # plt.plot(scaler.inverse_transform(rec_ts.reshape(-1, 1)), label='rec_ts')
-    # plt.legend()
-    # plt.show()
-    _ = 1
-
-    # ranks = list(range(5, 20))
-    # cur_errors = []
-    # with tqdm(total=len(ranks), desc='cur') as pbar:
-    #     for rank in ranks:
-    #         cur = CURDecomposition(rank=rank)
-    #         C, U, R = cur.fit_transform(M)
-    #         cur_errors.append(np.linalg.norm(M - C @ U @ R))
-    #         pbar.update(1)
-
-    # f,a = plt.subplots(2, 1, figsize=(10, 10))
-    # # a[0].plot(ranks, svd_errors, label='svd')
-    # a[1].plot(ranks, cur_errors, label='cur')
-    # a[0].set_title('svd')
-    # a[1].set_title('cur')
-    # plt.legend()
-    # plt.show()
-    _ = 1
diff --git a/fedot_ind/core/operation/transformation/splitter.py b/fedot_ind/core/operation/transformation/splitter.py
@@ -212,8 +212,6 @@ def balance_with_non_anomaly(self, series, target, features, non_anomaly_interva
         ts = series.copy()
         counter = 0
         taken_slots = pd.Series([0 for _ in range(len(ts))])
-        # for non_anom in non_anomaly_intervals:
-        #     taken_slots[non_anom[0]:non_anom[1]] = 0
 
         while len(non_anomaly_ts_list) != number_of_anomalies and counter != number_of_anomalies * 100:
             seed = np.random.randint(1000)
@@ -285,40 +283,3 @@ def _transform_test(self, series: np.array):
             transformed_data.append(series_part)
         transformed_data = np.stack(transformed_data)
         return transformed_data
-
-
-if __name__ == '__main__':
-    uni_ts = np.random.rand(800)
-    anomaly_d_uni = {'anomaly1': [[40, 50], [60, 80], [200, 220], [410, 420], [513, 524], [641, 645]],
-                     'anomaly2': [[130, 170], [300, 320], [400, 410], [589, 620], [715, 720]],
-                     'anomaly3': [[500, 530], [710, 740]],
-                     'anomaly4': [[77, 90], [98, 112], [145, 158], [290, 322]]}
-
-    ts1 = np.arange(0, 100)
-    multi_ts = np.array([ts1, ts1 * 2, ts1 * 3]).T
-    anomaly_d_multi = {'anomaly1': [[0, 5], [15, 20], [22, 24], [55, 63], [70, 90]],
-                       'anomaly2': [[10, 12], [15, 16], [27, 31], [44, 50], [98, 100]],
-                       'anomaly3': [[0, 3], [15, 18], [19, 24], [55, 60], [85, 90]]}
-
-    splitter_multi = TSTransformer()
-    train_multi, test_multi = splitter_multi.transform_for_fit(series=multi_ts,
-                                                               anomaly_dict=anomaly_d_multi, plot=False, binarize=True)
-
-    splitter_uni = TSTransformer()
-    train_uni, test_uni = splitter_uni.transform_for_fit(series=uni_ts,
-                                                         anomaly_dict=anomaly_d_uni, plot=True, binarize=True)
-
-    unique_ts = np.random.rand(800)
-    anomaly_unique = {
-        'class1': [[0, 10], [20, 30], [50, 60], [70, 80], [100, 110], [120, 130], [160, 170], 200, 210, 310, 330, 350,
-                   370, 410, 430, 460, 480, 500, 520, [540, 560], [590, 610], [630, 650], [680, 700], [720, 740],
-                   [760, 780], [80, 100], [320, 340]],
-        'class2': [[0, 20], [50, 70], [100, 120], [140, 160], [190, 210], [230, 250], [270, 290], [240, 250],
-                   [270, 280], [330, 340], [360, 370], [400, 410], [440, 450], [480, 490], [520, 530], [570, 580],
-                   [610, 620], [660, 670], [700, 710]]}
-
-    # splitter_unique = TSTransformer(strategy='unique')
-    # unique_cls, unique_train, unique_test = splitter_unique.transform_for_fit(series=unique_ts,
-    #                                                                           anomaly_dict=anomaly_unique, plot=True,
-    #                                                                           binarize=False)
-    _ = 1
diff --git a/fedot_ind/tools/explain/distances.py b/fedot_ind/tools/explain/distances.py
@@ -1,9 +1,7 @@
 import numpy as np
 from scipy.spatial.distance import cosine, euclidean
-from scipy.stats import cramervonmises
 from scipy.stats import energy_distance
 from scipy.stats import entropy
-from scipy.stats import ks_2samp
 
 
 def kl_divergence(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
@@ -45,34 +43,6 @@ def total_variation_distance(probs_before: np.ndarray, probs_after: np.ndarray)
     return 0.5 * np.sum(np.abs(probs_before - probs_after))
 
 
-def cramer_von_mises_statistic(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
-    """
-    The Cramer-von Mises statistic tests the goodness-of-fit of two samples, measuring the
-    similarity of their distributions.
-
-    Args:
-        probs_before: The probability distribution before some event.
-        probs_after: The probability distribution after the same event.
-
-    """
-    _, p_value = cramervonmises(probs_before, cdf='uniform')
-    return p_value
-
-
-def kolmogorov_smirnov_statistic(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
-    """
-    The Kolmogorov-Smirnov statistic tests the equality of two samples, measuring the maximum
-    difference between their empirical cumulative distribution functions.
-
-    Args:
-        probs_before: The probability distribution before some event.
-        probs_after: The probability distribution after the same event.
-
-    """
-    _, p_value = ks_2samp(probs_before, probs_after)
-    return p_value
-
-
 def energy_distance_measure(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
     """
     Energy Distance measures the distance between the characteristic functions of two distributions.
@@ -97,18 +67,6 @@ def hellinger_distance(probs_before: np.ndarray, probs_after: np.ndarray) -> flo
     return np.sqrt(np.sum((np.sqrt(probs_before) - np.sqrt(probs_after)) ** 2)) / np.sqrt(2)
 
 
-def bhattacharyya_distance(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
-    """
-    Bhattacharyya Distance measures the similarity between two probability distributions.
-
-    Args:
-        probs_before: The probability distribution before some event.
-        probs_after: The probability distribution after the same event.
-
-    """
-    return -np.log(np.sum(np.sqrt(probs_before * probs_after)))
-
-
 def cosine_distance(probs_before: np.ndarray, probs_after: np.ndarray) -> float:
     """
     Cosine Distance measures the cosine of the angle between two vectors, indicating their similarity.
@@ -145,11 +103,8 @@ def rmse(p, q):
     cosine=cosine_distance,
     euclidean=euclidean_distance,
     hellinger=hellinger_distance,
-    # bhattacharyya=bhattacharyya_distance,
     energy=energy_distance_measure,
-    # kolmogorov=kolmogorov_smirnov_statistic,
-    # cramer=cramer_von_mises_statistic,
-    # total_variation=total_variation_distance,
+    total_variation=total_variation_distance,
     jensen_shannon=jensen_shannon_divergence,
     kl_div=kl_divergence,
     cross_entropy=cross_entropy,