Update 0.0.78

- Fixed issues with LGBM and Catboost classifier tuning - Minor Optimizations - Enabled Garbage Collection for optuna - Changed default sampler for from TPE optuna to Multivariate TPE Signed-off-by: Arsh Anwar <lucifer78908@gmail.com>
d4rk-lucif3r · Jan 6, 2022 · c5b55cb · c5b55cb
1 parent be4086b
commit c5b55cb
Show file tree

Hide file tree

Showing 8 changed files with 33 additions and 44 deletions.
diff --git a/luciferml/supervised/classification.py b/luciferml/supervised/classification.py
@@ -45,7 +45,7 @@ def __init__(
         verbose=False,
         exclude_models=[],
         path=None,
-        optuna_sampler=TPESampler(),
+        optuna_sampler=TPESampler(multivariate=True),
         optuna_direction="maximize",
         optuna_n_trials=100,
         optuna_metric="accuracy",
@@ -343,7 +343,7 @@ def fit(self, features, labels):
             if self.predictor == "ann":
                 self.y_pred = (self.y_pred > 0.5).astype("int32")
             self.accuracy = accuracy_score(self.y_val, self.y_pred)
-            print(Fore.CYAN + "        Validation Accuracy is :", self.accuracy * 100)
+            print(Fore.CYAN + "        Validation Accuracy is : {:.2f} %".format(self.accuracy * 100))
             self.classifier_name, self.kfold_accuracy = kfold(
                 self.classifier,
                 self.predictor,

diff --git a/luciferml/supervised/regression.py b/luciferml/supervised/regression.py
@@ -40,7 +40,7 @@ def __init__(
         verbose=False,
         exclude_models=[],
         path=None,
-        optuna_sampler=TPESampler(),
+        optuna_sampler=TPESampler(multivariate=True),
         optuna_direction="maximize",
         optuna_n_trials=100,
         optuna_metric="r2",

diff --git a/luciferml/supervised/utils/predictors.py b/luciferml/supervised/utils/predictors.py
@@ -1,3 +1,4 @@
+from tkinter import N
 from catboost import CatBoostClassifier, CatBoostRegressor
 from colorama import Fore
 from lightgbm import LGBMClassifier, LGBMRegressor
@@ -191,7 +192,13 @@ def classification_predictor(
         elif predictor == "xgb":
             if mode == "single":
                 print(Fore.YELLOW + "Training XGBClassifier on Training Set [*]\n")
+            if verbose:
+                params["verbosity"] = 2
+            if not verbose:
+                params["verbosity"] = 0
+
             classifier = XGBClassifier(**params)
+            params.pop("verbosity")
             objective_to_be_tuned = objective.xgb_classifier_objective
 
         elif predictor == "ann":

diff --git a/luciferml/supervised/utils/tuner/luciferml_tuner.py b/luciferml/supervised/utils/tuner/luciferml_tuner.py
@@ -57,4 +57,4 @@ def luciferml_tuner(
         return best_params, best_score, model
     except Exception as error:
         print(Fore.RED + "HyperParam Tuning Failed with Error: ", error, "\n")
-        traceback.print_exc()
+        return None, 0, None
diff --git a/luciferml/supervised/utils/tuner/optuna/objectives/classification_objectives.py b/luciferml/supervised/utils/tuner/optuna/objectives/classification_objectives.py
@@ -325,39 +325,16 @@ def extc_classifier_objective(self, trial):
 
     def lgbm_classifier_objective(self, trial):
         param = {
-            "n_estimators": trial.suggest_int("n_estimators", 1, 100),
+            "n_estimators": trial.suggest_int("n_estimators", 1, 1000),
             "learning_rate": trial.suggest_loguniform("learning_rate", 1e-5, 1e5),
-            "boosting_type": trial.suggest_categorical(
-                "boosting_type", ["gbdt", "goss", "dart", "rf"]
-            ),
-            "num_leaves": trial.suggest_int("num_leaves", 1, 10),
             "max_depth": trial.suggest_int("max_depth", 1, 10),
-            "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 10),
             "min_child_samples": trial.suggest_int("min_child_samples", 1, 10),
-            "min_child_weight": trial.suggest_float("min_child_weight", 0, 0.5),
-            "max_bin": trial.suggest_int("max_bin", 1, 10),
+            "min_child_weight": trial.suggest_uniform("min_child_weight", 0, 0.5),
             "subsample": trial.suggest_uniform("subsample", 0.1, 1),
             "subsample_freq": trial.suggest_int("subsample_freq", 1, 10),
             "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.1, 1),
-            "subsample_for_bin": trial.suggest_int("subsample_for_bin", 1, 10),
-            "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-5, 1e5),
-            "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-5, 1e5),
-            "metric": trial.suggest_categorical(
-                "metric",
-                [
-                    "binary_error",
-                    "binary_logloss",
-                    "auc",
-                    "binary_precision",
-                    "binary_recall",
-                    "multi_error",
-                    "multi_logloss",
-                    "multi_precision",
-                    "multi_recall",
-                ],
-            ),
-            "objective": self.lgbm_objective,
             "random_state": self.random_state,
+            "objective": self.lgbm_objective,
             "n_jobs": -1,
         }
         clf = LGBMClassifier(**param)
@@ -373,19 +350,23 @@ def lgbm_classifier_objective(self, trial):
 
     def cat_classifier_objective(self, trial):
         param = {
-            "iterations": trial.suggest_int("iterations", 4000, 25000),
-            "od_wait": trial.suggest_int("od_wait", 500, 2300),
-            "learning_rate": trial.suggest_uniform("learning_rate", 0.01, 1),
-            "reg_lambda": trial.suggest_uniform("reg_lambda", 1e-5, 100),
-            "subsample": trial.suggest_uniform("subsample", 0, 1),
-            "random_strength": trial.suggest_uniform("random_strength", 10, 50),
-            "depth": trial.suggest_int("depth", 1, 15),
-            "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 30),
-            "leaf_estimation_iterations": trial.suggest_int(
-                "leaf_estimation_iterations", 1, 15
+            "objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
+            "iterations": trial.suggest_int("iterations", 100, 3000),
+            "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1),
+            "depth": trial.suggest_int("depth", 1, 12),
+            "boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
+            "bootstrap_type": trial.suggest_categorical(
+                "bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
             ),
-            "random_seed": self.random_state,
+            "used_ram_limit": "3gb",
+            "learning_rate": trial.suggest_loguniform("learning_rate", 1e-5, 1e5),
+            "random_state": self.random_state,
         }
+
+        if param["bootstrap_type"] == "Bayesian":
+            param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
+        elif param["bootstrap_type"] == "Bernoulli":
+            param["subsample"] = trial.suggest_float("subsample", 0.1, 1)
         clf = CatBoostClassifier(**param)
         scores = cross_val_score(
             clf,
@@ -410,6 +391,7 @@ def xgb_classifier_objective(self, trial):
             "reg_lambda": trial.suggest_uniform("reg_lambda", 0, 1),
             "random_state": self.random_state,
             "n_jobs": -1,
+
         }
         clf = XGBClassifier(**param)
         scores = cross_val_score(

diff --git a/luciferml/supervised/utils/tuner/optuna/optuna_base.py b/luciferml/supervised/utils/tuner/optuna/optuna_base.py
@@ -9,7 +9,7 @@ def __init__(self, sampler, n_trials=100, direction="maximize"):
 
     def tune(self, objective):
         study = optuna.create_study(direction=self.direction, sampler=self.sampler)
-        study.optimize(objective, n_trials=self.n_trials)
+        study.optimize(objective, n_trials=self.n_trials, n_jobs=-1, gc_after_trial=True)
         params = study.best_params
         best_score = study.best_value
         return params, best_score
diff --git a/luciferml/supervised/utils/validator.py b/luciferml/supervised/utils/validator.py
@@ -54,7 +54,7 @@ def kfold(model, predictor, X_train, y_train, cv_folds, isReg=False, all_mode=Fa
         )
         if not all_mode:
             if not isReg:
-                print("        Accuracy: {:.2f} %".format(accuracies.mean() * 100))
+                print("        KFold Accuracy: {:.2f} %".format(accuracies.mean() * 100))
             if isReg:
                 print("        R2 Score: {:.2f} %".format(accuracies.mean() * 100))
         model_name = name[predictor]

diff --git a/setup.py b/setup.py
@@ -18,7 +18,7 @@
         "luciferml.supervised.utils.tuner.optuna",
         "luciferml.supervised.utils.tuner.optuna.objectives",
     ],
-    version="0.0.77",
+    version="0.0.78",
     license="MIT",
     description="Automated ML by d4rk-lucif3r",
     long_description=long_description,