Skip to content
This repository has been archived by the owner on Jun 17, 2022. It is now read-only.

Commit

Permalink
Update 0.0.78
Browse files Browse the repository at this point in the history
- Fixed issues with LGBM and Catboost classifier tuning
- Minor Optimizations
- Enabled Garbage Collection for optuna
- Changed default sampler for from TPE optuna to Multivariate TPE

Signed-off-by: Arsh Anwar <lucifer78908@gmail.com>
  • Loading branch information
d4rk-lucif3r committed Jan 6, 2022
1 parent be4086b commit c5b55cb
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 44 deletions.
4 changes: 2 additions & 2 deletions luciferml/supervised/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
verbose=False,
exclude_models=[],
path=None,
optuna_sampler=TPESampler(),
optuna_sampler=TPESampler(multivariate=True),
optuna_direction="maximize",
optuna_n_trials=100,
optuna_metric="accuracy",
Expand Down Expand Up @@ -343,7 +343,7 @@ def fit(self, features, labels):
if self.predictor == "ann":
self.y_pred = (self.y_pred > 0.5).astype("int32")
self.accuracy = accuracy_score(self.y_val, self.y_pred)
print(Fore.CYAN + " Validation Accuracy is :", self.accuracy * 100)
print(Fore.CYAN + " Validation Accuracy is : {:.2f} %".format(self.accuracy * 100))
self.classifier_name, self.kfold_accuracy = kfold(
self.classifier,
self.predictor,
Expand Down
2 changes: 1 addition & 1 deletion luciferml/supervised/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(
verbose=False,
exclude_models=[],
path=None,
optuna_sampler=TPESampler(),
optuna_sampler=TPESampler(multivariate=True),
optuna_direction="maximize",
optuna_n_trials=100,
optuna_metric="r2",
Expand Down
7 changes: 7 additions & 0 deletions luciferml/supervised/utils/predictors.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from tkinter import N
from catboost import CatBoostClassifier, CatBoostRegressor
from colorama import Fore
from lightgbm import LGBMClassifier, LGBMRegressor
Expand Down Expand Up @@ -191,7 +192,13 @@ def classification_predictor(
elif predictor == "xgb":
if mode == "single":
print(Fore.YELLOW + "Training XGBClassifier on Training Set [*]\n")
if verbose:
params["verbosity"] = 2
if not verbose:
params["verbosity"] = 0

classifier = XGBClassifier(**params)
params.pop("verbosity")
objective_to_be_tuned = objective.xgb_classifier_objective

elif predictor == "ann":
Expand Down
2 changes: 1 addition & 1 deletion luciferml/supervised/utils/tuner/luciferml_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,4 @@ def luciferml_tuner(
return best_params, best_score, model
except Exception as error:
print(Fore.RED + "HyperParam Tuning Failed with Error: ", error, "\n")
traceback.print_exc()
return None, 0, None
Original file line number Diff line number Diff line change
Expand Up @@ -325,39 +325,16 @@ def extc_classifier_objective(self, trial):

def lgbm_classifier_objective(self, trial):
param = {
"n_estimators": trial.suggest_int("n_estimators", 1, 100),
"n_estimators": trial.suggest_int("n_estimators", 1, 1000),
"learning_rate": trial.suggest_loguniform("learning_rate", 1e-5, 1e5),
"boosting_type": trial.suggest_categorical(
"boosting_type", ["gbdt", "goss", "dart", "rf"]
),
"num_leaves": trial.suggest_int("num_leaves", 1, 10),
"max_depth": trial.suggest_int("max_depth", 1, 10),
"min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 10),
"min_child_samples": trial.suggest_int("min_child_samples", 1, 10),
"min_child_weight": trial.suggest_float("min_child_weight", 0, 0.5),
"max_bin": trial.suggest_int("max_bin", 1, 10),
"min_child_weight": trial.suggest_uniform("min_child_weight", 0, 0.5),
"subsample": trial.suggest_uniform("subsample", 0.1, 1),
"subsample_freq": trial.suggest_int("subsample_freq", 1, 10),
"colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.1, 1),
"subsample_for_bin": trial.suggest_int("subsample_for_bin", 1, 10),
"reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-5, 1e5),
"reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-5, 1e5),
"metric": trial.suggest_categorical(
"metric",
[
"binary_error",
"binary_logloss",
"auc",
"binary_precision",
"binary_recall",
"multi_error",
"multi_logloss",
"multi_precision",
"multi_recall",
],
),
"objective": self.lgbm_objective,
"random_state": self.random_state,
"objective": self.lgbm_objective,
"n_jobs": -1,
}
clf = LGBMClassifier(**param)
Expand All @@ -373,19 +350,23 @@ def lgbm_classifier_objective(self, trial):

def cat_classifier_objective(self, trial):
param = {
"iterations": trial.suggest_int("iterations", 4000, 25000),
"od_wait": trial.suggest_int("od_wait", 500, 2300),
"learning_rate": trial.suggest_uniform("learning_rate", 0.01, 1),
"reg_lambda": trial.suggest_uniform("reg_lambda", 1e-5, 100),
"subsample": trial.suggest_uniform("subsample", 0, 1),
"random_strength": trial.suggest_uniform("random_strength", 10, 50),
"depth": trial.suggest_int("depth", 1, 15),
"min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 30),
"leaf_estimation_iterations": trial.suggest_int(
"leaf_estimation_iterations", 1, 15
"objective": trial.suggest_categorical("objective", ["Logloss", "CrossEntropy"]),
"iterations": trial.suggest_int("iterations", 100, 3000),
"colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1),
"depth": trial.suggest_int("depth", 1, 12),
"boosting_type": trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
"bootstrap_type": trial.suggest_categorical(
"bootstrap_type", ["Bayesian", "Bernoulli", "MVS"]
),
"random_seed": self.random_state,
"used_ram_limit": "3gb",
"learning_rate": trial.suggest_loguniform("learning_rate", 1e-5, 1e5),
"random_state": self.random_state,
}

if param["bootstrap_type"] == "Bayesian":
param["bagging_temperature"] = trial.suggest_float("bagging_temperature", 0, 10)
elif param["bootstrap_type"] == "Bernoulli":
param["subsample"] = trial.suggest_float("subsample", 0.1, 1)
clf = CatBoostClassifier(**param)
scores = cross_val_score(
clf,
Expand All @@ -410,6 +391,7 @@ def xgb_classifier_objective(self, trial):
"reg_lambda": trial.suggest_uniform("reg_lambda", 0, 1),
"random_state": self.random_state,
"n_jobs": -1,

}
clf = XGBClassifier(**param)
scores = cross_val_score(
Expand Down
2 changes: 1 addition & 1 deletion luciferml/supervised/utils/tuner/optuna/optuna_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self, sampler, n_trials=100, direction="maximize"):

def tune(self, objective):
study = optuna.create_study(direction=self.direction, sampler=self.sampler)
study.optimize(objective, n_trials=self.n_trials)
study.optimize(objective, n_trials=self.n_trials, n_jobs=-1, gc_after_trial=True)
params = study.best_params
best_score = study.best_value
return params, best_score
2 changes: 1 addition & 1 deletion luciferml/supervised/utils/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def kfold(model, predictor, X_train, y_train, cv_folds, isReg=False, all_mode=Fa
)
if not all_mode:
if not isReg:
print(" Accuracy: {:.2f} %".format(accuracies.mean() * 100))
print(" KFold Accuracy: {:.2f} %".format(accuracies.mean() * 100))
if isReg:
print(" R2 Score: {:.2f} %".format(accuracies.mean() * 100))
model_name = name[predictor]
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"luciferml.supervised.utils.tuner.optuna",
"luciferml.supervised.utils.tuner.optuna.objectives",
],
version="0.0.77",
version="0.0.78",
license="MIT",
description="Automated ML by d4rk-lucif3r",
long_description=long_description,
Expand Down

0 comments on commit c5b55cb

Please sign in to comment.