fix(pu): fix device bug in sampled efficientzero

opendilab · Dec 12, 2023 · 9b00a00 · 9b00a00
1 parent 784a1c2
commit 9b00a00
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/lzero/policy/sampled_efficientzero.py b/lzero/policy/sampled_efficientzero.py
@@ -650,9 +650,9 @@ def _calculate_policy_loss_cont(
             y = 1 - target_sampled_actions[:, k, :].pow(2)
 
             # NOTE: for numerical stability.
-            target_sampled_actions_clamped = torch.clamp(
-                target_sampled_actions[:, k, :], torch.tensor(-1 + 1e-6), torch.tensor(1 - 1e-6)
-            )
+            min_val = torch.tensor(-1 + 1e-6).to(target_sampled_actions.device)
+            max_val = torch.tensor(1 - 1e-6).to(target_sampled_actions.device)
+            target_sampled_actions_clamped = torch.clamp(target_sampled_actions[:, k, :], min_val, max_val)
             target_sampled_actions_before_tanh = torch.arctanh(target_sampled_actions_clamped)
 
             # keep dimension for loss computation (usually for action space is 1 env. e.g. pendulum)