Revert "Add generalised MUL and generalised EU as metrics and ensure …

…baselines compute them" This reverts commit c8f824c.
LucasAlegre · May 4, 2024 · 349ecd5 · 349ecd5
1 parent 2c983a3
commit 349ecd5
Show file tree

Hide file tree

Showing 10 changed files with 228 additions and 368 deletions.
diff --git a/morl_baselines/common/evaluation.py b/morl_baselines/common/evaluation.py
@@ -1,7 +1,7 @@
 """Utilities related to evaluation."""
 import os
 import random
-from typing import List, Optional, Tuple, Callable
+from typing import List, Optional, Tuple
 
 import numpy as np
 import torch as th
@@ -16,18 +16,16 @@
     igd,
     maximum_utility_loss,
     sparsity,
-    generalised_expected_utility,
-    generalised_maximum_utility_loss
 )
 from morl_baselines.common.weights import equally_spaced_weights
 
 
 def eval_mo(
-        agent,
-        env,
-        w: Optional[np.ndarray] = None,
-        scalarization=np.dot,
-        render: bool = False,
+    agent,
+    env,
+    w: Optional[np.ndarray] = None,
+    scalarization=np.dot,
+    render: bool = False,
 ) -> Tuple[float, float, np.ndarray, np.ndarray]:
     """Evaluates one episode of the agent in the environment.
 
@@ -70,11 +68,11 @@ def eval_mo(
 
 
 def eval_mo_reward_conditioned(
-        agent,
-        env,
-        scalarization=np.dot,
-        w: Optional[np.ndarray] = None,
-        render: bool = False,
+    agent,
+    env,
+    scalarization=np.dot,
+    w: Optional[np.ndarray] = None,
+    render: bool = False,
 ) -> Tuple[float, float, np.ndarray, np.ndarray]:
     """Evaluates one episode of the agent in the environment. This makes the assumption that the agent is conditioned on the accrued reward i.e. for ESR agent.
 
@@ -90,8 +88,7 @@ def eval_mo_reward_conditioned(
     """
     obs, _ = env.reset()
     done = False
-    vec_return, disc_vec_return = np.zeros(env.unwrapped.reward_space.shape[0]), np.zeros(
-        env.unwrapped.reward_space.shape[0])
+    vec_return, disc_vec_return = np.zeros(env.unwrapped.reward_space.shape[0]), np.zeros(env.unwrapped.reward_space.shape[0])
     gamma = 1.0
     while not done:
         if render:
@@ -118,7 +115,7 @@ def eval_mo_reward_conditioned(
 
 
 def policy_evaluation_mo(
-        agent, env, w: np.ndarray, scalarization=np.dot, rep: int = 5
+    agent, env, w: np.ndarray, scalarization=np.dot, rep: int = 5
 ) -> Tuple[float, float, np.ndarray, np.ndarray]:
     """Evaluates the value of a policy by running the policy for multiple episodes. Returns the average returns.
 
@@ -147,13 +144,12 @@ def policy_evaluation_mo(
 
 
 def log_all_multi_policy_metrics(
-        current_front: List[np.ndarray],
-        hv_ref_point: np.ndarray,
-        reward_dim: int,
-        global_step: int,
-        n_sample_weights: int,
-        utility_fns: Callable,
-        ref_front: Optional[List[np.ndarray]] = None,
+    current_front: List[np.ndarray],
+    hv_ref_point: np.ndarray,
+    reward_dim: int,
+    global_step: int,
+    n_sample_weights: int,
+    ref_front: Optional[List[np.ndarray]] = None,
 ):
     """Logs all metrics for multi-policy training.
 
@@ -177,15 +173,13 @@ def log_all_multi_policy_metrics(
     hv = hypervolume(hv_ref_point, filtered_front)
     sp = sparsity(filtered_front)
     eum = expected_utility(filtered_front, weights_set=equally_spaced_weights(reward_dim, n_sample_weights))
-    geum = generalised_expected_utility(filtered_front, utility_fns)
     card = cardinality(filtered_front)
 
     wandb.log(
         {
             "eval/hypervolume": hv,
             "eval/sparsity": sp,
             "eval/eum": eum,
-            "eval/geum": geum,
             "eval/cardinality": card,
             "global_step": global_step,
         },
@@ -205,12 +199,7 @@ def log_all_multi_policy_metrics(
             reference_set=ref_front,
             weights_set=get_reference_directions("energy", reward_dim, n_sample_weights).astype(np.float32),
         )
-        gmul = generalised_maximum_utility_loss(
-            front=filtered_front,
-            reference_set=ref_front,
-            utility_fns=utility_fns
-        )
-        wandb.log({"eval/igd": generational_distance, "eval/mul": mul, "eval/gmul": gmul})
+        wandb.log({"eval/igd": generational_distance, "eval/mul": mul})
 
 
 def seed_everything(seed: int):
@@ -232,12 +221,12 @@ def seed_everything(seed: int):
 
 
 def log_episode_info(
-        info: dict,
-        scalarization,
-        weights: Optional[np.ndarray],
-        global_timestep: int,
-        id: Optional[int] = None,
-        verbose: bool = True,
+    info: dict,
+    scalarization,
+    weights: Optional[np.ndarray],
+    global_timestep: int,
+    id: Optional[int] = None,
+    verbose: bool = True,
 ):
     """Logs information of the last episode from the info dict (automatically filled by the RecordStatisticsWrapper).
 

diff --git a/morl_baselines/common/monotonic_utility.py b/morl_baselines/common/monotonic_utility.py
diff --git a/morl_baselines/common/morl_algorithm.py b/morl_baselines/common/morl_algorithm.py
@@ -13,7 +13,6 @@
 from gymnasium import spaces
 from mo_gymnasium.utils import MOSyncVectorEnv
 
-from morl_baselines.common.monotonic_utility import UtilityFunction
 from morl_baselines.common.evaluation import (
     eval_mo_reward_conditioned,
     policy_evaluation_mo,
@@ -55,11 +54,11 @@ def eval(self, obs: np.ndarray, w: Optional[np.ndarray]) -> Union[int, np.ndarra
         """
 
     def __report(
-            self,
-            scalarized_return,
-            scalarized_discounted_return,
-            vec_return,
-            discounted_vec_return,
+        self,
+        scalarized_return,
+        scalarized_discounted_return,
+        vec_return,
+        discounted_vec_return,
     ):
         """Writes the data to wandb summary."""
         if self.id is None:
@@ -80,12 +79,12 @@ def __report(
             )
 
     def policy_eval(
-            self,
-            eval_env,
-            num_episodes: int = 5,
-            scalarization=np.dot,
-            weights: Optional[np.ndarray] = None,
-            log: bool = False,
+        self,
+        eval_env,
+        num_episodes: int = 5,
+        scalarization=np.dot,
+        weights: Optional[np.ndarray] = None,
+        log: bool = False,
     ):
         """Runs a policy evaluation (typically over a few episodes) on eval_env and logs some metrics if asked.
 
@@ -117,11 +116,11 @@ def policy_eval(
         return scalarized_return, scalarized_discounted_return, vec_return, discounted_vec_return
 
     def policy_eval_esr(
-            self,
-            eval_env,
-            scalarization,
-            weights: Optional[np.ndarray] = None,
-            log: bool = False,
+        self,
+        eval_env,
+        scalarization,
+        weights: Optional[np.ndarray] = None,
+        log: bool = False,
     ):
         """Runs a policy evaluation (typically on one episode) on eval_env and logs some metrics if asked.
 
@@ -183,15 +182,7 @@ def update(self) -> None:
 class MOAgent(ABC):
     """An MORL Agent, can contain one or multiple MOPolicies. Contains helpers to extract features from the environment, setup logging etc."""
 
-    def __init__(
-            self,
-            env: Optional[gym.Env],
-            num_utility_fns: int = 100,
-            min_val: Optional[np.ndarray] = None,
-            max_val: Optional[np.ndarray] = None,
-            device: Union[th.device, str] = "auto",
-            seed: Optional[int] = None
-    ) -> None:
+    def __init__(self, env: Optional[gym.Env], device: Union[th.device, str] = "auto", seed: Optional[int] = None) -> None:
         """Initializes the agent.
 
         Args:
@@ -206,16 +197,6 @@ def __init__(
         self.num_episodes = 0
         self.seed = seed
         self.np_random = np.random.default_rng(self.seed)
-        self.min_val = min_val if min_val is not None else np.zeros(self.reward_dim)
-        self.max_val = max_val if max_val is not None else np.ones(self.reward_dim)
-        self.utility_fns = [UtilityFunction(  # These are used in evaluation
-            self.min_val,
-            self.max_val,
-            frozen=True,
-            normalise=True,
-            max_weight=0.1,
-            size_factor=1)
-        ]
 
     def extract_env_info(self, env: Optional[gym.Env]) -> None:
         """Extracts all the features of the environment: observation space, action space, ...
@@ -261,7 +242,7 @@ def register_additional_config(self, conf: Dict = {}) -> None:
             wandb.config[key] = value
 
     def setup_wandb(
-            self, project_name: str, experiment_name: str, entity: Optional[str] = None, group: Optional[str] = None
+        self, project_name: str, experiment_name: str, entity: Optional[str] = None, group: Optional[str] = None
     ) -> None:
         """Initializes the wandb writer.
 
@@ -288,6 +269,9 @@ def setup_wandb(
             entity=entity,
             config=config,
             name=self.full_experiment_name,
+            monitor_gym=monitor_gym,
+            save_code=True,
+            group=group,
         )
         # The default "step" of wandb is not the actual time step (gloabl_step) of the MDP
         wandb.define_metric("*", step_metric="global_step")

diff --git a/morl_baselines/common/performance_indicators.py b/morl_baselines/common/performance_indicators.py
@@ -5,7 +5,6 @@
 from copy import deepcopy
 from typing import Callable, List
 
-import torch
 import numpy as np
 import numpy.typing as npt
 from pymoo.indicators.hv import HV
@@ -123,20 +122,3 @@ def maximum_utility_loss(
     max_scalarized_values = [np.max([utility(weight, point) for point in front]) for weight in weights_set]
     utility_losses = [max_scalarized_values_ref[i] - max_scalarized_values[i] for i in range(len(max_scalarized_values))]
     return np.max(utility_losses)
-
-
-def generalised_maximum_utility_loss(front, reference_set, utility_fns):
-    """Compute the maximum utility loss for a front and utility functions wrt a reference set."""
-    utility_losses = []
-    for utility_fn in utility_fns:
-        front_utilities = utility_fn(reference_set)  # Compute the utility for the front
-        approx_utilities = utility_fn(front)  # Compute the utility for the approximate front
-        max_utility_loss = torch.max(front_utilities) - torch.max(approx_utilities)  # Compute the utility loss.
-        utility_losses.append(max_utility_loss)
-    return torch.max(torch.stack(utility_losses))
-
-
-def generalised_expected_utility(front, utility_fns):
-    """Compute the expected utility for the set of utility functions when taking vectors from the front."""
-    utilities = [torch.max(utility_fn(front)) for utility_fn in utility_fns]
-    return torch.mean(torch.stack(utilities))
diff --git a/morl_baselines/common/utility_functions.py b/morl_baselines/common/utility_functions.py