diff --git a/morl_baselines/multi_policy/pcn/pcn.py b/morl_baselines/multi_policy/pcn/pcn.py index a3fd687a..73d7b8c5 100644 --- a/morl_baselines/multi_policy/pcn/pcn.py +++ b/morl_baselines/multi_policy/pcn/pcn.py @@ -365,7 +365,7 @@ def evaluate(self, env, max_return, n=10): horizons = np.float32(horizons) e_returns = [] for i in range(n): - transitions = self._run_episode(env, returns[i], np.float32(horizons[i] - 2), max_return, eval_mode=True) + transitions = self._run_episode(env, returns[i], np.float32(horizons[i]), max_return, eval_mode=True) # compute return for i in reversed(range(len(transitions) - 1)): transitions[i].reward += self.gamma * transitions[i + 1].reward