diff --git a/torch_frame/gbdt/tuned_xgboost.py b/torch_frame/gbdt/tuned_xgboost.py
index 9b939d32..562c53f9 100644
--- a/torch_frame/gbdt/tuned_xgboost.py
+++ b/torch_frame/gbdt/tuned_xgboost.py
@@ -88,12 +88,12 @@ def _to_xgboost_input(
         return feat, y, types
 
     def objective(
-        self,
-        trial: Any,  # optuna.trial.Trial
-        dtrain: Any,  # xgboost.DMatrix
-        dvalid: Any,  # xgboost.DMatrix
-        num_boost_round: int,
-    ) -> float:
+            self,
+            trial: Any,  # optuna.trial.Trial
+            dtrain: Any,  # xgboost.DMatrix
+            dvalid: Any,  # xgboost.DMatrix
+            num_boost_round: int,
+            device: str = 'cpu') -> float:
         r"""Objective function to be optimized.
 
         Args:
@@ -101,6 +101,9 @@ def objective(
             dtrain (xgboost.DMatrix): Train data.
             dvalid (xgboost.DMatrix): Validation data.
             num_boost_round (int): Number of boosting round.
+            device (str): The device for XGBoost to train on. One of `cpu`,
+                `cuda`, `cuda<ordinal>`, `gpu`, `gpu:<ordinal>`. See XGBoost
+                documentation for details.
 
         Returns:
             float: Best objective value. Root mean squared error for
@@ -118,8 +121,13 @@ def objective(
              else trial.suggest_float('lambda', 1e-8, 1e2, log=True)),
             "alpha":
             (0.0 if not trial.suggest_categorical('use_alpha', [True, False])
-             else trial.suggest_float('alpha', 1e-8, 1e2, log=True))
+             else trial.suggest_float('alpha', 1e-8, 1e2, log=True)),
+            "device":
+            device
         }
+        if device.startswith("gpu") or device.startswith("cuda"):
+            self.params["tree_method"] = "hist"
+
         if self.params["booster"] == "gbtree" or self.params[
                 "booster"] == "dart":
             self.params["max_depth"] = trial.suggest_int("max_depth", 3, 11)
@@ -182,13 +190,9 @@ def objective(
                                     torch.from_numpy(pred))
         return score
 
-    def _tune(
-        self,
-        tf_train: TensorFrame,
-        tf_val: TensorFrame,
-        num_trials: int,
-        num_boost_round: int = 2000,
-    ):
+    def _tune(self, tf_train: TensorFrame, tf_val: TensorFrame,
+              num_trials: int, num_boost_round: int = 2000,
+              device: str = 'cpu'):
         import optuna
         import xgboost
 
@@ -207,8 +211,8 @@ def _tune(
                                  feature_types=val_feat_type,
                                  enable_categorical=True)
         study.optimize(
-            lambda trial: self.objective(trial, dtrain, dvalid, num_boost_round
-                                         ), num_trials)
+            lambda trial: self.objective(trial, dtrain, dvalid,
+                                         num_boost_round, device), num_trials)
         self.params.update(study.best_params)
 
         self.model = xgboost.train(self.params, dtrain,