From 74b885aa23267c93166765db1f3412a4afee069d Mon Sep 17 00:00:00 2001 From: Viktor Krylov <119884857+PvtKaefsky@users.noreply.github.com> Date: Fri, 17 May 2024 14:49:04 +0300 Subject: [PATCH] Update default params based on tuning search space (#137) * Update default params and tuning search space --------- Co-authored-by: technocreep --- .../core/models/manifold/riemann_embeding.py | 42 +- .../models/recurrence/reccurence_extractor.py | 5 +- .../operation/transformation/basis/fourier.py | 2 +- .../data/default_operation_params.json | 419 +++++++++++------- fedot_ind/core/tuning/search_space.py | 169 +++---- 5 files changed, 350 insertions(+), 287 deletions(-) diff --git a/fedot_ind/core/models/manifold/riemann_embeding.py b/fedot_ind/core/models/manifold/riemann_embeding.py index f3f1c8347..b3c6fc1f2 100644 --- a/fedot_ind/core/models/manifold/riemann_embeding.py +++ b/fedot_ind/core/models/manifold/riemann_embeding.py @@ -17,7 +17,7 @@ class RiemannExtractor(BaseExtractor): Attributes: estimator (str): estimator for covariance matrix, 'corr', 'cov', 'lwf', 'mcd', 'hub' - distance_metric (str): metric for tangent space, 'riemann', 'logeuclid', 'euclid' + tangent_metric (str): metric for tangent space, 'riemann', 'logeuclid', 'euclid' Example: To use this class you need to import it and call needed methods:: @@ -45,40 +45,38 @@ def __init__(self, params: Optional[OperationParameters] = None): 'tangent': self.extract_riemann_features, 'ensemble': self._ensemble_features} - self.n_filter = params.get('nfilter', 2) self.estimator = params.get('estimator', 'scm') - self.covariance_metric = params.get('SPD_metric', 'riemann') - self.distance_metric = params.get('tangent_metric', 'riemann') - self.extraction_strategy = params.get( - 'extraction_strategy ', 'ensemble') - - self.covariance_transformer = params.get('SPD_space', None) - self.tangent_projector = params.get('tangent_space', None) - if np.any([self.covariance_transformer, self.tangent_projector]) is None: + self.spd_metric = params.get('SPD_metric', 'riemann') + self.tangent_metric = params.get('tangent_metric', 'riemann') + self.extraction_strategy = 'ensemble' + + self.spd_space = params.get('SPD_space', None) + self.tangent_space = params.get('tangent_space', None) + if np.any([self.spd_space, self.tangent_space]) is None: self._init_spaces() self.fit_stage = True self.extraction_func = extraction_dict[self.extraction_strategy] self.logging_params.update({ 'estimator': self.estimator, - 'tangent_space_metric': self.distance_metric, - 'SPD_space_metric': self.covariance_metric}) + 'tangent_space_metric': self.tangent_metric, + 'SPD_space_metric': self.spd_metric}) def _init_spaces(self): - self.covariance_transformer = Covariances(estimator='scm') - self.tangent_projector = TangentSpace(metric=self.distance_metric) + self.spd_space = Covariances(estimator='scm') + self.tangent_space = TangentSpace(metric=self.tangent_metric) self.shrinkage = Shrinkage() def extract_riemann_features(self, input_data: InputData) -> InputData: if not self.fit_stage: - SPD = self.covariance_transformer.transform(input_data.features) + SPD = self.spd_space.transform(input_data.features) SPD = self.shrinkage.transform(SPD) - ref_point = self.tangent_projector.transform(SPD) + ref_point = self.tangent_space.transform(SPD) else: - SPD = self.covariance_transformer.fit_transform( + SPD = self.spd_space.fit_transform( input_data.features, input_data.target) SPD = self.shrinkage.fit_transform(SPD) - ref_point = self.tangent_projector.fit_transform(SPD) + ref_point = self.tangent_space.fit_transform(SPD) self.fit_stage = False self.classes_ = np.unique(input_data.target) return ref_point @@ -86,19 +84,19 @@ def extract_riemann_features(self, input_data: InputData) -> InputData: def extract_centroid_distance(self, input_data: InputData): input_data.target = input_data.target.astype(int) if self.fit_stage: - SPD = self.covariance_transformer.fit_transform( + SPD = self.spd_space.fit_transform( input_data.features, input_data.target) SPD = self.shrinkage.transform(SPD) else: - SPD = self.covariance_transformer.transform(input_data.features) + SPD = self.spd_space.transform(input_data.features) SPD = self.shrinkage.fit_transform(SPD) self.covmeans_ = [mean_covariance(SPD[np.array(input_data.target == ll).flatten()], - metric=self.covariance_metric) for ll in self.classes_] + metric=self.spd_metric) for ll in self.classes_] n_centroids = len(self.covmeans_) - dist = [distance(SPD, self.covmeans_[m], self.distance_metric) + dist = [distance(SPD, self.covmeans_[m], self.tangent_metric) for m in range(n_centroids)] dist = np.concatenate(dist, axis=1) feature_matrix = softmax(-dist ** 2) diff --git a/fedot_ind/core/models/recurrence/reccurence_extractor.py b/fedot_ind/core/models/recurrence/reccurence_extractor.py index 1cc1acb6e..d8a1a0b42 100644 --- a/fedot_ind/core/models/recurrence/reccurence_extractor.py +++ b/fedot_ind/core/models/recurrence/reccurence_extractor.py @@ -5,7 +5,7 @@ from fedot.core.operations.operation_parameters import OperationParameters from fedot.core.repository.dataset_types import DataTypesEnum -from fedot_ind.core.metrics.metrics_implementation import * +#from fedot_ind.core.metrics.metrics_implementation import * from fedot_ind.core.models.base_extractor import BaseExtractor from fedot_ind.core.models.recurrence.sequences import RecurrenceFeatureExtractor from fedot_ind.core.operation.transformation.data.hankel import HankelMatrix @@ -45,9 +45,8 @@ def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) self.window_size = params.get('window_size', 0) self.stride = params.get('stride', 1) - self.rec_metric = params.get('rec_metric', 'cosine') + self.rec_metric = params.get('rec_metric', 'cosine') # TODO add threshold for other metrics self.image_mode = params.get('image_mode', False) - self.rec_metric = 'cosine' # TODO add threshold for other metrics self.transformer = TSTransformer self.extractor = RecurrenceFeatureExtractor diff --git a/fedot_ind/core/operation/transformation/basis/fourier.py b/fedot_ind/core/operation/transformation/basis/fourier.py index 229c6bf9e..389b667ea 100644 --- a/fedot_ind/core/operation/transformation/basis/fourier.py +++ b/fedot_ind/core/operation/transformation/basis/fourier.py @@ -25,7 +25,7 @@ def __repr__(self): def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) self.threshold = params.get('threshold') - self.approximation = params.get('approximation', 'smooth') + self.approximation = 'smooth' self.basis = None self.logging_params.update({'threshold': self.threshold}) diff --git a/fedot_ind/core/repository/data/default_operation_params.json b/fedot_ind/core/repository/data/default_operation_params.json index 1df469e80..2757abaf2 100644 --- a/fedot_ind/core/repository/data/default_operation_params.json +++ b/fedot_ind/core/repository/data/default_operation_params.json @@ -1,79 +1,195 @@ { + "eigen_basis": { + "window_size": 20, + "rank_regularization": "hard_thresholding", + "low_rank_approximation": true, + "tensor_approximation": false + }, + "wavelet_basis": { + "n_components": 2, + "wavelet": "mexh" + }, + "fourier_basis": { + "threshold": 20000 + }, + "topological_extractor": { + "window_size_as_share": 0.33, + "max_homology_dimension": 1, + "metric": "euclidean", + "window_size": 25, + "stride": 5 + }, + "quantile_extractor": { + "stride": 1, + "window_size": 0 + }, + "riemann_extractor": { + "estimator": "scm", + "tangent_metric": "riemann", + "SPD_metric": "riemann" + }, + "recurrence_extractor": { + "window_size": 0, + "stride": 1, + "rec_metric": "cosine", + "image_mode": false + }, + "minirocket_extractor": { + "num_features": 10000 + }, + "chronos_extractor": { + "num_features": 10000 + }, + "channel_filtration": { + "distance": "euclidean", + "shrink": 1e-5, + "centroid_metric": "euclidean", + "selection_strategy": "sum" + }, + "patch_tst_model": { + "epochs": 100, + "batch_size": 32, + "activation": "ReLU", + "learning_rate": 0.001, + "use_amp": false, + "forecast_length": null, + "patch_len": null, + "ouput_attention": false, + "forecast_mode": "out_of_sample" + }, + "omniscale_model": { + "epochs": 100, + "batch_size": 32, + "activation": "Softmax", + "num_classes": 1 + }, + "inception_model": { + "epochs": 100, + "batch_size": 32, + "activation": "Softmax", + "num_classes": 1 + }, + "resnet_model": { + "epochs": 100, + "batch_size": 32, + "activation": "Softmax", + "model_name": "ResNet18" + }, + "ssa_forecaster": { + "window_size_method": "hac", + "history_lookback": 30 + }, + "kmeans": { + "n_clusters": 4 + }, + "adareg": { + "learning_rate": 0.1, + "loss": "square" + }, + "gbr": { + "loss": "quantile", + "learning_rate": 0.1, + "max_depth": 5, + "min_samples_split": 10, + "min_samples_leaf": 10, + "subsample": 0.5, + "max_features": 0.9, + "alpha": 0.85 + }, + "logit": { + "C": 1, + "penalty": "l2", + "solver": "liblinear" + }, "rf": { "criterion":"gini", - "max_features":0.9, - "min_samples_split":5, - "min_samples_leaf":5, - "bootstrap":false + "max_features": 0.9, + "min_samples_split": 5, + "min_samples_leaf": 5, + "bootstrap": false + }, + "ridge": { + "alpha": 1.0 }, - "mlp":{ - "max_iter":300, - "activation":"relu", - "solver":"adam" + "lasso": { + "alpha": 1.0 }, "rfr": { - "max_features":0.9, - "min_samples_split":5, - "min_samples_leaf":5, - "bootstrap":false + "max_features": 0.9, + "min_samples_split": 5, + "min_samples_leaf": 5, + "bootstrap": false + }, + "xgbreg": { + "max_depth": 5, + "learning_rate": 0.1, + "subsample": 0.5, + "min_child_weight": 10 }, "xgboost": { "n_estimators": 500, - "learning_rate": 0.1, "max_depth": 5, + "learning_rate": 0.1, + "subsample": 0.5, + "min_weight_fraction_leaf": 0.25, + "min_samples_leaf": 0.5, + "min_samples_split": 0.5, "random_state": 42 }, - "catboost": { - "allow_writing_files": false, - "verbose": false + "svr": { + "C": 1.0, + "epsilon": 0.1, + "tol": 1e-3, + "loss": "squared_epsilon_insensitive" }, - "catboostreg": { - "allow_writing_files": false, - "verbose": false + "dtreg": { + "max_depth": 5, + "min_samples_split": 10, + "min_samples_leaf": 10 }, - "lgbm": { - "num_leaves": 32, - "colsample_bytree": 0.8, - "subsample": 0.8, - "subsample_freq": 10, - "learning_rate": 0.03, - "n_estimators": 100 + "treg": { + "max_features": 0.5, + "min_samples_split": 10, + "min_samples_leaf": 10, + "bootstrap": false }, - "lgbmreg": { - "num_leaves": 32, - "colsample_bytree": 0.8, - "subsample": 0.8, - "subsample_freq": 10, - "learning_rate": 0.03, - "n_estimators": 100 + "dt": { + "max_depth": 5, + "min_samples_split": 10, + "min_samples_leaf": 10 }, - "lagged": { - "window_size": 10 + "knnreg": { + "n_neighbors": 5, + "weights": "uniform", + "p": 1 }, - "diff_filter": { - "window_size": 3, - "poly_degree": 2, - "order": 1 + "knn": { + "n_neighbors": 5, + "weights": "uniform", + "p": 1 }, - "cut": { - "cut_part": 0.5 + "arima": { + "p": 2, + "d": 0, + "q": 2 }, - "sparse_lagged": { - "window_size": 10, - "n_components": 0.5, - "sparse_transform": true, - "use_svd": false + "stl_arima": { + "p": 2, + "d": 0, + "q": 2, + "period": 30 + }, + "mlp": { + "activation": "relu", + "max_iter": 300, + "learning_rate": "constant", + "solver": "adam" }, "ar": { "lag_1": 7, "lag_2": 12, "trend": "c" }, - "arima": { - "p": 2, - "d": 0, - "q": 2 - }, "ets": { "error": "add", "trend": null, @@ -81,25 +197,10 @@ "damped_trend": false, "seasonal_periods": 7 }, - "polyfit": { - "degree": 2 - }, "glm": { "family": "gaussian", "link": "identity" }, - "ransac_lin_reg": { - "min_samples": 0.4, - "residual_threshold": 10, - "max_trials": 100, - "max_skips": 1000 - }, - "ransac_non_lin_reg": { - "min_samples": 0.4, - "residual_threshold": 10, - "max_trials": 100, - "max_skips": 1000 - }, "cgru": { "hidden_size": 200, "learning_rate": 0.001, @@ -112,38 +213,84 @@ "optimizer": "adamw", "loss": "mse" }, - "h2o_regr": { - "timeout": 20, - "seed": 42, - "max_models": 3 + "pca": { + "svd_solver": "full", + "n_components": 0.7 }, - "h2o_class": { - "timeout": 20, - "seed": 42, - "max_models": 3 + "kernel_pca": { + "n_components": null, + "kernel": "rbf" }, - "tpot_class": { - "timeout": 2, - "generations": 3, - "population_size": 3 + "lagged": { + "window_size": 10 }, - "tpot_regr": { - "timeout": 2, - "generations": 3, - "population_size": 3 + "sparse_lagged": { + "window_size": 10, + "n_components": 0.5, + "use_svd": false, + "sparse_transform": true + }, + "smoothing": { + "window_size": 10 + }, + "gaussian_filter": { + "sigma": 2 + }, + "diff_filter": { + "poly_degree": 2, + "order": 1, + "window_size": 3 + }, + "cut": { + "cut_part": 0.5 + }, + "lgbm": { + "n_estimators": 100, + "class_weight": null, + "num_leaves": 32, + "learning_rate": 0.03, + "colsample_bytree": 0.8, + "subsample": 0.8, + "reg_alpha": 0.1, + "reg_lambda": 0.1, + "subsample_freq": 10 + }, + "lgbmreg": { + "n_estimators": 100, + "num_leaves": 32, + "learning_rate": 0.03, + "colsample_bytree": 0.8, + "subsample": 0.8, + "reg_alpha": 0.1, + "reg_lambda": 0.1, + "subsample_freq": 10 + }, + "catboost": { + "allow_writing_files": false, + "verbose": false, + "max_depth": 5, + "learning_rate": 0.1, + "min_data_in_leaf": 3, + "border_count": 32, + "l2_leaf_reg": 1 + }, + "catboostreg": { + "allow_writing_files": false, + "verbose": false, + "max_depth": 5, + "learning_rate": 0.1, + "min_data_in_leaf": 3, + "border_count": 32, + "l2_leaf_reg": 1 }, "resample": { "balance": "expand_minority", "replace": false, "balance_ratio": 1 }, - "pca": { - "svd_solver": "full", - "n_components": 0.7 - }, - "kernel_pca": { - "n_components": null, - "kernel": "rbf" + "lda": { + "solver": "svd", + "shrinkage": 0.5 }, "ts_naive_average": { "part_for_averaging": 1.0 @@ -158,49 +305,43 @@ "min_df": 0.1, "max_df": 0.9 }, - "fast_ica": { - "whiten": "unit-variance" + "polyfit": { + "degree": 2 }, - "eigen_basis": { - "window_size": 25, - "sv_selector": "median", - "rank_regularization": "hard_thresholding" + "ransac_lin_reg": { + "min_samples": 0.4, + "residual_threshold": 10, + "max_trials": 100, + "max_skips": 1000 }, - "channel_filtration": { - "distance": "euclidean", - "shrink": 1e-5, - "centroid_metric": "euclidean", - "selection_strategy": "pairwise" + "ransac_non_lin_reg": { + "min_samples": 0.4, + "residual_threshold": 10, + "max_trials": 100, + "max_skips": 1000 }, - "wavelet_basis": { - "wavelet": "mexh", - "n_components": 2 + "h2o_regr": { + "timeout": 20, + "seed": 42, + "max_models": 3 }, - "fourier_basis": { - "spectrum_type": "smoothed", - "threshold": 20000 + "h2o_class": { + "timeout": 20, + "seed": 42, + "max_models": 3 }, - "quantile_extractor": { - "window_size": 0, - "window_mode": false, - "var_threshold": 0.01 + "tpot_class": { + "timeout": 2, + "generations": 3, + "population_size": 3 }, - "riemann_extractor": { - "n_filter": 4, - "estimator": "scm", - "tangent_metric": "riemann", - "SPD_metric": "riemann" + "tpot_regr": { + "timeout": 2, + "generations": 3, + "population_size": 3 }, - "recurrence_extractor": { - "window_size": 20, - "window_mode": true, - "min_signal_ratio": 0.5, - "max_signal_ratio": 0.75, - "rec_metric": "euclidean", - "image_mode": false}, - "ssa_forecaster": { - "window_size_method": "hac", - "history_lookback": 30 + "fast_ica": { + "whiten": "unit-variance" }, "fedot_cls": { "timeout": 10, @@ -210,40 +351,12 @@ "timeout": 10, "with_tuning": true }, - "minirocket_extractor": { - "num_features": 10000 - }, - "chronos_extractor": { - "num_features": 10000 - }, - "inception_model": { - "epochs": 100, - "batch_size": 32 - }, - "omniscale_model": { - "epochs": 100, - "batch_size": 32 - }, "tst_model": { "epochs": 100, "batch_size": 32 }, - "resnet_model": { - "epochs": 100, - "batch_size": 32 - }, "xcm_model": { "epochs": 100, "batch_size": 32 - }, - "patch_tst_model": { - "epochs": 100, - "batch_size": 32 - }, - "topological_extractor": { - "n_jobs": 2, - "window_size_as_share": 0.33, - "max_homology_dimension": 1, - "metric": "euclidean" } } \ No newline at end of file diff --git a/fedot_ind/core/tuning/search_space.py b/fedot_ind/core/tuning/search_space.py index f87a1e186..158804fa3 100644 --- a/fedot_ind/core/tuning/search_space.py +++ b/fedot_ind/core/tuning/search_space.py @@ -9,9 +9,10 @@ industrial_search_space = { 'eigen_basis': {'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]}, - 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]}, + # 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]}, 'rank_regularization': {'hyperopt-dist': hp.choice, 'sampling-scope': [ - ['hard_thresholding', 'explained_dispersion']]}}, + ['hard_thresholding', 'explained_dispersion']]}, + 'low_rank_approximation': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]}}, 'wavelet_basis': {'n_components': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 10]}, 'wavelet': {'hyperopt-dist': hp.choice, @@ -23,8 +24,8 @@ {'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]}, 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]}}, 'quantile_extractor': - {'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]}, - 'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(0, 50, 3)]]}}, + {'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]}, + 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]}}, 'riemann_extractor': {'estimator': {'hyperopt-dist': hp.choice, 'sampling-scope': [['corr', 'cov', 'lwf', 'mcd', 'hub']]}, @@ -34,20 +35,20 @@ 'riemann' ]]}, 'SPD_metric': {'hyperopt-dist': hp.choice, 'sampling-scope': [[ - # 'ale', - # 'alm', 'euclid', 'identity', 'logeuclid', 'riemann']]}}, 'recurrence_extractor': {'window_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(5, 50, 5)]]}, 'stride': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(1, 10, 1)]]}, - # 'rec_metric': (hp.choice, [['chebyshev', 'cosine', 'euclidean', 'mahalanobis']]) - }, - 'signal_extractor': - {'n_components': {'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 10]}, - 'wavelet': {'hyperopt-dist': hp.choice, - 'sampling-scope': [['mexh', 'morl', 'db5', 'sym5']]}}, + 'rec_metric': (hp.choice, [['cosine', 'euclidean']]), + 'image_mode': {'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]]}}, + 'minirocket_extractor': + {'num_features': {'hyperopt-dist': hp.choice, + 'sampling-scope': [[x for x in range(5000, 20000, 1000)]]}}, + 'chronos_extractor': + {'num_features': {'hyperopt-dist': hp.choice, + 'sampling-scope': [[x for x in range(5000, 20000, 1000)]]}}, 'channel_filtration': {'distance': {'hyperopt-dist': hp.choice, 'sampling-scope': [['manhattan', 'euclidean', 'chebyshev']]}, @@ -59,12 +60,6 @@ 'selection_strategy': {'hyperopt-dist': hp.choice, 'sampling-scope': [['sum', 'pairwise']]} }, - 'minirocket_extractor': - {'num_features': {'hyperopt-dist': hp.choice, - 'sampling-scope': [[x for x in range(5000, 20000, 1000)]]}}, - 'chronos_extractor': - {'num_features': {'hyperopt-dist': hp.choice, - 'sampling-scope': [[x for x in range(5000, 20000, 1000)]]}}, 'patch_tst_model': {'epochs': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(10, 100, 10)]]}, 'batch_size': {'hyperopt-dist': hp.choice, 'sampling-scope': [[x for x in range(8, 64, 6)]]}, @@ -94,12 +89,12 @@ def get_industrial_search_space(self): - parameters_per_operation = {'kmeans': { - 'n_clusters': { - 'hyperopt-dist': hp.uniformint, - 'sampling-scope': [2, 7], - 'type': 'discrete'} - }, + parameters_per_operation = { + 'kmeans': { + 'n_clusters': { + 'hyperopt-dist': hp.uniformint, + 'sampling-scope': [2, 7], + 'type': 'discrete'}}, 'adareg': { 'learning_rate': { 'hyperopt-dist': hp.loguniform, @@ -108,8 +103,7 @@ def get_industrial_search_space(self): 'loss': { 'hyperopt-dist': hp.choice, 'sampling-scope': [["linear", "square", "exponential"]], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'gbr': { 'loss': { 'hyperopt-dist': hp.choice, @@ -142,8 +136,7 @@ def get_industrial_search_space(self): 'alpha': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0.75, 0.99], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'logit': { 'C': { 'hyperopt-dist': hp.uniform, @@ -158,8 +151,7 @@ def get_industrial_search_space(self): 'solver': { 'hyperopt-dist': hp.choice, 'sampling-scope': [['liblinear']], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'rf': { 'criterion': { 'hyperopt-dist': hp.choice, @@ -180,20 +172,17 @@ def get_industrial_search_space(self): 'bootstrap': { 'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'ridge': { 'alpha': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0.01, 10.0], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'lasso': { 'alpha': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0.01, 10.0], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'rfr': { 'max_features': { 'hyperopt-dist': hp.uniform, @@ -210,8 +199,7 @@ def get_industrial_search_space(self): 'bootstrap': { 'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'xgbreg': { 'max_depth': { 'hyperopt-dist': hp.uniformint, @@ -228,8 +216,7 @@ def get_industrial_search_space(self): 'min_child_weight': { 'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 21], - 'type': 'discrete'}, - }, + 'type': 'discrete'}}, 'xgboost': { 'n_estimators': { 'hyperopt-dist': hp.uniformint, @@ -258,8 +245,7 @@ def get_industrial_search_space(self): 'min_samples_split': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0.0, 1.0], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'svr': { 'C': { 'hyperopt-dist': hp.uniform, @@ -276,8 +262,7 @@ def get_industrial_search_space(self): 'loss': { 'hyperopt-dist': hp.choice, 'sampling-scope': [["epsilon_insensitive", "squared_epsilon_insensitive"]], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'dtreg': { 'max_depth': { 'hyperopt-dist': hp.uniformint, @@ -290,8 +275,7 @@ def get_industrial_search_space(self): 'min_samples_leaf': { 'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 21], - 'type': 'discrete'} - }, + 'type': 'discrete'}}, 'treg': { 'max_features': { 'hyperopt-dist': hp.uniform, @@ -308,8 +292,7 @@ def get_industrial_search_space(self): 'bootstrap': { 'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'dt': { 'max_depth': { 'hyperopt-dist': hp.uniformint, @@ -322,8 +305,7 @@ def get_industrial_search_space(self): 'min_samples_leaf': { 'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 21], - 'type': 'discrete'} - }, + 'type': 'discrete'}}, 'knnreg': { 'n_neighbors': { 'hyperopt-dist': hp.uniformint, @@ -336,8 +318,7 @@ def get_industrial_search_space(self): 'p': { 'hyperopt-dist': hp.choice, 'sampling-scope': [[1, 2]], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'knn': { 'n_neighbors': { 'hyperopt-dist': hp.uniformint, @@ -350,8 +331,7 @@ def get_industrial_search_space(self): 'p': { 'hyperopt-dist': hp.choice, 'sampling-scope': [[1, 2]], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'arima': { 'p': { 'hyperopt-dist': hp.uniformint, @@ -364,8 +344,7 @@ def get_industrial_search_space(self): 'q': { 'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 5], - 'type': 'discrete'} - }, + 'type': 'discrete'}}, 'stl_arima': { 'p': { 'hyperopt-dist': hp.uniformint, @@ -382,8 +361,7 @@ def get_industrial_search_space(self): 'period': { 'hyperopt-dist': hp.uniformint, 'sampling-scope': [1, 365], - 'type': 'discrete'} - }, + 'type': 'discrete'}}, 'mlp': { 'hidden_layer_sizes': { 'hyperopt-dist': hp.choice, @@ -398,8 +376,7 @@ def get_industrial_search_space(self): 'type': 'discrete'}, 'learning_rate': {'hyperopt-dist': hp.choice, 'sampling-scope': [['constant', 'adaptive']], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'ar': { 'lag_1': { 'hyperopt-dist': hp.uniform, @@ -408,13 +385,11 @@ def get_industrial_search_space(self): 'lag_2': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [2, 800], - 'type': 'continuous'} - , + 'type': 'continuous'}, 'trend': { 'hyperopt-dist': hp.choice, 'sampling-scope': [['n', 'c', 't', 'ct']], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'ets': { 'error': { 'hyperopt-dist': hp.choice, @@ -435,8 +410,7 @@ def get_industrial_search_space(self): 'seasonal_periods': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [1, 100], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'glm': { NESTED_PARAMS_LABEL: { 'hyperopt-dist': hp.choice, @@ -460,8 +434,7 @@ def get_industrial_search_space(self): } ]], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'cgru': { 'hidden_size': { 'hyperopt-dist': hp.uniform, @@ -502,8 +475,7 @@ def get_industrial_search_space(self): 'loss': { 'hyperopt-dist': hp.choice, 'sampling-scope': [['mae', 'mse']], - 'type': 'categorical'}, - }, + 'type': 'categorical'}}, 'topological_extractor': { 'window_size_as_share': { 'hyperopt-dist': hp.uniform, @@ -518,14 +490,12 @@ def get_industrial_search_space(self): 'metric': { 'hyperopt-dist': hp.choice, 'sampling-scope': [['euclidean', 'manhattan', 'cosine']], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'pca': { 'n_components': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0.1, 0.99], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'kernel_pca': { 'n_components': { 'hyperopt-dist': hp.uniformint, @@ -534,14 +504,12 @@ def get_industrial_search_space(self): 'kernel': { 'hyperopt-dist': hp.choice, 'sampling-scope': [['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'lagged': { 'window_size': { 'hyperopt-dist': hp.uniformint, 'sampling-scope': [5, 500], - 'type': 'discrete'} - }, + 'type': 'discrete'}}, 'sparse_lagged': { 'window_size': { 'hyperopt-dist': hp.uniformint, @@ -554,20 +522,17 @@ def get_industrial_search_space(self): 'use_svd': { 'hyperopt-dist': hp.choice, 'sampling-scope': [[True, False]], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'smoothing': { 'window_size': { 'hyperopt-dist': hp.uniformint, 'sampling-scope': [2, 20], - 'type': 'discrete'} - }, + 'type': 'discrete'}}, 'gaussian_filter': { 'sigma': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [1, 5], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'diff_filter': { 'poly_degree': { 'hyperopt-dist': hp.uniformint, @@ -580,14 +545,12 @@ def get_industrial_search_space(self): 'window_size': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [3, 20], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'cut': { 'cut_part': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0, 0.9], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'lgbm': { 'class_weight': { 'hyperopt-dist': hp.choice, @@ -616,8 +579,7 @@ def get_industrial_search_space(self): 'reg_lambda': { 'hyperopt-dist': hp.loguniform, 'sampling-scope': [1e-8, 10], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'lgbmreg': { 'num_leaves': { 'hyperopt-dist': hp.uniformint, @@ -642,8 +604,7 @@ def get_industrial_search_space(self): 'reg_lambda': { 'hyperopt-dist': hp.loguniform, 'sampling-scope': [1e-8, 10], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'catboost': { 'max_depth': { 'hyperopt-dist': hp.uniformint, @@ -664,8 +625,7 @@ def get_industrial_search_space(self): 'l2_leaf_reg': { 'hyperopt-dist': hp.loguniform, 'sampling-scope': [1e-8, 10], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'catboostreg': { 'max_depth': { 'hyperopt-dist': hp.uniformint, @@ -686,8 +646,7 @@ def get_industrial_search_space(self): 'l2_leaf_reg': { 'hyperopt-dist': hp.loguniform, 'sampling-scope': [1e-8, 10], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'resample': { 'balance': { 'hyperopt-dist': hp.choice, @@ -700,8 +659,7 @@ def get_industrial_search_space(self): 'balance_ratio': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0.3, 1], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'lda': { 'solver': { 'hyperopt-dist': hp.choice, @@ -710,27 +668,23 @@ def get_industrial_search_space(self): 'shrinkage': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0.1, 0.9], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'ts_naive_average': { 'part_for_averaging': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0.1, 1], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'locf': { 'part_for_repeat': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0.01, 0.5], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, 'word2vec_pretrained': { 'model_name': { 'hyperopt-dist': hp.choice, 'sampling-scope': [['glove-twitter-25', 'glove-twitter-50', 'glove-wiki-gigaword-100', 'word2vec-ruscorpora-300']], - 'type': 'categorical'} - }, + 'type': 'categorical'}}, 'tfidf': { 'ngram_range': { 'hyperopt-dist': hp.choice, @@ -743,8 +697,7 @@ def get_industrial_search_space(self): 'max_df': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0.9, 0.99], - 'type': 'continuous'} - }, + 'type': 'continuous'}}, } for key in industrial_search_space: parameters_per_operation[key] = industrial_search_space[key]