From 77f2b1aec5f565336ac7b4f7e0e5b5595946d023 Mon Sep 17 00:00:00 2001 From: statmlben Date: Fri, 15 Nov 2024 16:24:39 +0800 Subject: [PATCH] add warmstart --- rehline/_base.py | 66 +++++++++++++++++++++ rehline/_class.py | 125 +++++++++++++++++++++++---------------- tests/_test_svm.py | 7 +-- tests/_test_warmstart.py | 72 ++++++++++++++++++++++ 4 files changed, 213 insertions(+), 57 deletions(-) create mode 100644 tests/_test_warmstart.py diff --git a/rehline/_base.py b/rehline/_base.py index 144bd4b..3e47be0 100644 --- a/rehline/_base.py +++ b/rehline/_base.py @@ -73,6 +73,62 @@ def auto_shape(self): self.H = self.S.shape[0] self.K = self.A.shape[0] + def cast_sample_weight(self, sample_weight=None): + """ + Cast the sample weight to the ReHLine parameters. + + Parameters + ---------- + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If None, then samples are equally weighted. + + Returns + ------- + U_weight : array-like of shape (L, n_samples) + Weighted ReLU coefficient matrix. + + V_weight : array-like of shape (L, n_samples) + Weighted ReLU intercept vector. + + Tau_weight : array-like of shape (H, n_samples) + Weighted ReHU cutpoint matrix. + + S_weight : array-like of shape (H, n_samples) + Weighted ReHU coefficient vector. + + T_weight : array-like of shape (H, n_samples) + Weighted ReHU intercept vector. + + Notes + ----- + This method casts the sample weight to the ReHLine parameters by multiplying + the sample weight with the ReLU and ReHU parameters. If sample_weight is None, + then the sample weight is set to the weight parameter C. + """ + + self.auto_shape() + + sample_weight = self.C*sample_weight + + if self.L > 0: + U_weight = self.U * sample_weight + V_weight = self.V * sample_weight + else: + U_weight = self.U + V_weight = self.V + + if self.H > 0: + sqrt_sample_weight = np.sqrt(sample_weight) + Tau_weight = self.Tau * sqrt_sample_weight + S_weight = self.S * sqrt_sample_weight + T_weight = self.T * sqrt_sample_weight + else: + Tau_weight = self.Tau + S_weight = self.S + T_weight = self.T + + return U_weight, V_weight, Tau_weight, S_weight, T_weight + def call_ReLHLoss(self, score): """ Return the value of the ReHLine loss of the `score`. @@ -172,12 +228,22 @@ def _check_rehu(rehu_coef, rehu_intercept, rehu_cut): if len(rehu_coef) > 0: assert (rehu_cut >= 0.0).all(), "`rehu_cut` must be non-negative!" + def ReHLine_solver(X, U, V, Tau=np.empty(shape=(0, 0)), S=np.empty(shape=(0, 0)), T=np.empty(shape=(0, 0)), A=np.empty(shape=(0, 0)), b=np.empty(shape=(0)), + Lambda=np.empty(shape=(0, 0)), + Gamma=np.empty(shape=(0, 0)), + xi=np.empty(shape=(0, 0)), max_iter=1000, tol=1e-4, shrink=1, verbose=1, trace_freq=100): result = rehline_result() + if len(Lambda)>0: + result.Lambda = Lambda + if len(Gamma)>0: + result.Gamma = Gamma + if len(xi)>0: + result.xi = xi rehline_internal(result, X, A, b, U, V, S, T, Tau, max_iter, tol, shrink, verbose, trace_freq) return result diff --git a/rehline/_class.py b/rehline/_class.py index 55d9fe4..b6bf618 100644 --- a/rehline/_class.py +++ b/rehline/_class.py @@ -48,13 +48,24 @@ class ReHLine(_BaseReHLine, BaseEstimator): The intercept vector in the linear constraint. verbose : int, default=0 - Enable verbose output. Note that this setting takes advantage of a - per-process runtime setting in liblinear that, if enabled, may not work - properly in a multithreaded context. + Enable verbose output. max_iter : int, default=1000 The maximum number of iterations to be run. + tol : float, default=1e-4 + The tolerance for the stopping criterion. + + shrink : float, default=1 + The shrinkage of dual variables for the ReHLine algorithm. + + warm_start : bool, default=False + Whether to use the given dual params as an initial guess for the + optimization algorithm. + + trace_freq : int, default=100 + The frequency at which to print the optimization trace. + Attributes ---------- coef\_ : array-like @@ -72,6 +83,15 @@ class ReHLine(_BaseReHLine, BaseEstimator): primal_obj\_ : array-like The primal objective function values. + Lambda: array-like + The optimized dual variables for ReLU parts. + + Gamma: array-like + The optimized dual variables for ReHU parts. + + xi: array-like + The optimized dual variables for linear constraints. + Examples -------- @@ -109,7 +129,8 @@ def __init__(self, C=1., Tau=np.empty(shape=(0,0)), S=np.empty(shape=(0,0)), T=np.empty(shape=(0,0)), A=np.empty(shape=(0,0)), b=np.empty(shape=(0)), - max_iter=1000, tol=1e-4, shrink=1, verbose=0, trace_freq=100): + max_iter=1000, tol=1e-4, shrink=1, warm_start=0, + verbose=0, trace_freq=100): self.C = C self.U = U self.V = V @@ -124,8 +145,13 @@ def __init__(self, C=1., self.max_iter = max_iter self.tol = tol self.shrink = shrink + self.warm_start = warm_start self.verbose = verbose self.trace_freq = trace_freq + self.Lambda = np.empty(shape=(0, 0)) + self.Gamma = np.empty(shape=(0, 0)) + self.xi = np.empty(shape=(0, 0)) + self.coef_ = None def fit(self, X, sample_weight=None): """Fit the model based on the given training data. @@ -147,41 +173,34 @@ def fit(self, X, sample_weight=None): An instance of the estimator. """ # X = check_array(X) + sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) - - if sample_weight is None: - sample_weight = self.C - else: - sample_weight = self.C*_check_sample_weight(sample_weight, X, dtype=X.dtype) - - if self.L > 0: - U_weight = self.U * sample_weight - V_weight = self.V * sample_weight - else: - U_weight = self.U - V_weight = self.V - - if self.H > 0: - sqrt_sample_weight = np.sqrt(sample_weight) - Tau_weight = self.Tau * sqrt_sample_weight - S_weight = self.S * sqrt_sample_weight - T_weight = self.T * sqrt_sample_weight - else: - Tau_weight = self.Tau - S_weight = self.S - T_weight = self.T + U_weight, V_weight, Tau_weight, S_weight, T_weight = self.cast_sample_weight(sample_weight=sample_weight) + + if not self.warm_start: + ## remove warm_start params + self.Lambda = np.empty(shape=(0, 0)) + self.Gamma = np.empty(shape=(0, 0)) + self.xi = np.empty(shape=(0, 0)) result = ReHLine_solver(X=X, U=U_weight, V=V_weight, Tau=Tau_weight, S=S_weight, T=T_weight, A=self.A, b=self.b, + Lambda=self.Lambda, Gamma=self.Gamma, xi=self.xi, max_iter=self.max_iter, tol=self.tol, shrink=self.shrink, verbose=self.verbose, trace_freq=self.trace_freq) - self.coef_ = result.beta self.opt_result_ = result + # primal solution + self.coef_ = result.beta + # dual solution + self.Lambda = result.Lambda + self.Gamma = result.Gamma + self.xi = result.xi + # algo convergence self.n_iter_ = result.niter self.dual_obj_ = result.dual_objfns self.primal_obj_ = result.primal_objfns @@ -191,6 +210,7 @@ def fit(self, X, sample_weight=None): "ReHLine failed to converge, increase the number of iterations: `max_iter`.", ConvergenceWarning, ) + return self def decision_function(self, X): """The decision function evaluated on the given dataset @@ -304,7 +324,8 @@ def __init__(self, loss, Tau=np.empty(shape=(0,0)), S=np.empty(shape=(0,0)), T=np.empty(shape=(0,0)), A=np.empty(shape=(0,0)), b=np.empty(shape=(0)), - max_iter=1000, tol=1e-4, shrink=1, verbose=0, trace_freq=100): + max_iter=1000, tol=1e-4, shrink=1, warm_start=0, + verbose=0, trace_freq=100): self.loss = loss self.constraint = constraint self.C = C @@ -321,9 +342,13 @@ def __init__(self, loss, self.max_iter = max_iter self.tol = tol self.shrink = shrink + self.warm_start = warm_start self.verbose = verbose self.trace_freq = trace_freq - self.dummy_n = 0 + self.Lambda = np.empty(shape=(0, 0)) + self.Gamma = np.empty(shape=(0, 0)) + self.xi = np.empty(shape=(0, 0)) + self.coef_ = None def fit(self, X, y, sample_weight=None): """Fit the model based on the given training data. @@ -358,40 +383,34 @@ def fit(self, X, y, sample_weight=None): self.A, self.b = _make_constraint_rehline_param(constraint=self.constraint, X=X, y=y) self.auto_shape() - ## sample weight -> rehline params - if sample_weight is None: - sample_weight = self.C - else: - sample_weight = self.C*_check_sample_weight(sample_weight, X, dtype=X.dtype) - - if self.L > 0: - U_weight = self.U * sample_weight - V_weight = self.V * sample_weight - else: - U_weight = self.U - V_weight = self.V - - if self.H > 0: - sqrt_sample_weight = np.sqrt(sample_weight) - Tau_weight = self.Tau * sqrt_sample_weight - S_weight = self.S * sqrt_sample_weight - T_weight = self.T * sqrt_sample_weight - else: - Tau_weight = self.Tau - S_weight = self.S - T_weight = self.T + sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) + + U_weight, V_weight, Tau_weight, S_weight, T_weight = self.cast_sample_weight(sample_weight=sample_weight) + + if not self.warm_start: + ## remove warm_start params + self.Lambda = np.empty(shape=(0, 0)) + self.Gamma = np.empty(shape=(0, 0)) + self.xi = np.empty(shape=(0, 0)) result = ReHLine_solver(X=X, U=U_weight, V=V_weight, Tau=Tau_weight, S=S_weight, T=T_weight, A=self.A, b=self.b, + Lambda=self.Lambda, Gamma=self.Gamma, xi=self.xi, max_iter=self.max_iter, tol=self.tol, shrink=self.shrink, verbose=self.verbose, trace_freq=self.trace_freq) - self.coef_ = result.beta self.opt_result_ = result + # primal solution + self.coef_ = result.beta + # dual solution + self.Lambda = result.Lambda + self.Gamma = result.Gamma + self.xi = result.xi + # algo convergence self.n_iter_ = result.niter self.dual_obj_ = result.dual_objfns self.primal_obj_ = result.primal_objfns @@ -402,6 +421,8 @@ def fit(self, X, y, sample_weight=None): ConvergenceWarning, ) + return self + def decision_function(self, X): """The decision function evaluated on the given dataset diff --git a/tests/_test_svm.py b/tests/_test_svm.py index 07f88cb..80d1fcb 100644 --- a/tests/_test_svm.py +++ b/tests/_test_svm.py @@ -1,5 +1,6 @@ ## Test SVM on simulated dataset import numpy as np + from rehline import ReHLine np.random.seed(1024) @@ -11,6 +12,7 @@ ## solution provided by sklearn from sklearn.svm import LinearSVC + clf = LinearSVC(C=C, loss='hinge', fit_intercept=False, random_state=0, tol=1e-6, max_iter=1000000) clf.fit(X, y) @@ -18,11 +20,6 @@ print('solution privided by liblinear: %s' %sol) -## solution provided by ReHLine -# build-in loss -clf = ReHLine(loss={'name': 'svm'}, C=C) -clf.make_ReLHLoss(X=X, y=y, loss={'name': 'svm'}) -clf.fit(X=X) print('solution privided by rehline: %s' %clf.coef_) print(clf.decision_function([[.1,.2,.3]])) diff --git a/tests/_test_warmstart.py b/tests/_test_warmstart.py new file mode 100644 index 0000000..3317249 --- /dev/null +++ b/tests/_test_warmstart.py @@ -0,0 +1,72 @@ +""" +ReHLine-python: test warmstart +v.0.0.6 - 2024-11-15 +""" +# Authors: Ben Dai + +import numpy as np + +from rehline import ReHLine, plqERM_Ridge +from rehline._base import ReHLine_solver + + +def test_warmstart(): + np.random.seed(1024) + + # Simulate classification dataset + n, d, C = 1000, 3, 0.5 + X = np.random.randn(n, d) + beta0 = np.random.randn(d) + y = np.sign(X.dot(beta0) + np.random.randn(n)) + + # Test `ReHLine_solver` + print("\nTesting ReHLine_solver") + print("------------------------") + + # Test cold start + print("Cold start:") + U = -(C*y).reshape(1,-1) + V = (C*np.array(np.ones(n))).reshape(1,-1) + res = ReHLine_solver(X, U, V) + print(f"Lambda shape: {res.Lambda.shape}") + + # Test warm start + print("\nWarm start:") + res_ws = ReHLine_solver(X, U, V, Lambda=res.Lambda) + print(f"Lambda shape: {res_ws.Lambda.shape}") + + # Test `ReHLine` + print("\nTesting ReHLine") + print("-----------------") + + print("Cold start:") + clf = ReHLine(verbose=1) + clf.C = C + clf.U = -y.reshape(1,-1) + clf.V = np.array(np.ones(n)).reshape(1,-1) + clf.fit(X) + print(f"Coefficients: {clf.coef_}") + + print("\nWarm start:") + clf.C = 2*C + clf.warm_start = 1 + clf.fit(X) + print(f"Coefficients: {clf.coef_}") + + # Test `plqERM_Ridge` + print("\nTesting plqERM_Ridge") + print("---------------------") + + print("Cold start:") + clf = plqERM_Ridge(loss={'name': 'svm'}, C=C, verbose=1) + clf.fit(X=X, y=y) + print(f"Coefficients: {clf.coef_}") + + print("\nWarm start:") + clf.C = 2*C + clf.warm_start = 1 + clf.fit(X=X, y=y) + print(f"Coefficients: {clf.coef_}") + +if __name__ == "__main__": + test_warmstart()