Skip to content

Commit

Permalink
Revert "Add generalised MUL and generalised EU as metrics and ensure …
Browse files Browse the repository at this point in the history
…baselines compute them"

This reverts commit c8f824c.
  • Loading branch information
wilrop committed May 4, 2024
1 parent 2c983a3 commit 349ecd5
Show file tree
Hide file tree
Showing 10 changed files with 228 additions and 368 deletions.
63 changes: 26 additions & 37 deletions morl_baselines/common/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Utilities related to evaluation."""
import os
import random
from typing import List, Optional, Tuple, Callable
from typing import List, Optional, Tuple

import numpy as np
import torch as th
Expand All @@ -16,18 +16,16 @@
igd,
maximum_utility_loss,
sparsity,
generalised_expected_utility,
generalised_maximum_utility_loss
)
from morl_baselines.common.weights import equally_spaced_weights


def eval_mo(
agent,
env,
w: Optional[np.ndarray] = None,
scalarization=np.dot,
render: bool = False,
agent,
env,
w: Optional[np.ndarray] = None,
scalarization=np.dot,
render: bool = False,
) -> Tuple[float, float, np.ndarray, np.ndarray]:
"""Evaluates one episode of the agent in the environment.
Expand Down Expand Up @@ -70,11 +68,11 @@ def eval_mo(


def eval_mo_reward_conditioned(
agent,
env,
scalarization=np.dot,
w: Optional[np.ndarray] = None,
render: bool = False,
agent,
env,
scalarization=np.dot,
w: Optional[np.ndarray] = None,
render: bool = False,
) -> Tuple[float, float, np.ndarray, np.ndarray]:
"""Evaluates one episode of the agent in the environment. This makes the assumption that the agent is conditioned on the accrued reward i.e. for ESR agent.
Expand All @@ -90,8 +88,7 @@ def eval_mo_reward_conditioned(
"""
obs, _ = env.reset()
done = False
vec_return, disc_vec_return = np.zeros(env.unwrapped.reward_space.shape[0]), np.zeros(
env.unwrapped.reward_space.shape[0])
vec_return, disc_vec_return = np.zeros(env.unwrapped.reward_space.shape[0]), np.zeros(env.unwrapped.reward_space.shape[0])
gamma = 1.0
while not done:
if render:
Expand All @@ -118,7 +115,7 @@ def eval_mo_reward_conditioned(


def policy_evaluation_mo(
agent, env, w: np.ndarray, scalarization=np.dot, rep: int = 5
agent, env, w: np.ndarray, scalarization=np.dot, rep: int = 5
) -> Tuple[float, float, np.ndarray, np.ndarray]:
"""Evaluates the value of a policy by running the policy for multiple episodes. Returns the average returns.
Expand Down Expand Up @@ -147,13 +144,12 @@ def policy_evaluation_mo(


def log_all_multi_policy_metrics(
current_front: List[np.ndarray],
hv_ref_point: np.ndarray,
reward_dim: int,
global_step: int,
n_sample_weights: int,
utility_fns: Callable,
ref_front: Optional[List[np.ndarray]] = None,
current_front: List[np.ndarray],
hv_ref_point: np.ndarray,
reward_dim: int,
global_step: int,
n_sample_weights: int,
ref_front: Optional[List[np.ndarray]] = None,
):
"""Logs all metrics for multi-policy training.
Expand All @@ -177,15 +173,13 @@ def log_all_multi_policy_metrics(
hv = hypervolume(hv_ref_point, filtered_front)
sp = sparsity(filtered_front)
eum = expected_utility(filtered_front, weights_set=equally_spaced_weights(reward_dim, n_sample_weights))
geum = generalised_expected_utility(filtered_front, utility_fns)
card = cardinality(filtered_front)

wandb.log(
{
"eval/hypervolume": hv,
"eval/sparsity": sp,
"eval/eum": eum,
"eval/geum": geum,
"eval/cardinality": card,
"global_step": global_step,
},
Expand All @@ -205,12 +199,7 @@ def log_all_multi_policy_metrics(
reference_set=ref_front,
weights_set=get_reference_directions("energy", reward_dim, n_sample_weights).astype(np.float32),
)
gmul = generalised_maximum_utility_loss(
front=filtered_front,
reference_set=ref_front,
utility_fns=utility_fns
)
wandb.log({"eval/igd": generational_distance, "eval/mul": mul, "eval/gmul": gmul})
wandb.log({"eval/igd": generational_distance, "eval/mul": mul})


def seed_everything(seed: int):
Expand All @@ -232,12 +221,12 @@ def seed_everything(seed: int):


def log_episode_info(
info: dict,
scalarization,
weights: Optional[np.ndarray],
global_timestep: int,
id: Optional[int] = None,
verbose: bool = True,
info: dict,
scalarization,
weights: Optional[np.ndarray],
global_timestep: int,
id: Optional[int] = None,
verbose: bool = True,
):
"""Logs information of the last episode from the info dict (automatically filled by the RecordStatisticsWrapper).
Expand Down
70 changes: 0 additions & 70 deletions morl_baselines/common/monotonic_utility.py

This file was deleted.

58 changes: 21 additions & 37 deletions morl_baselines/common/morl_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from gymnasium import spaces
from mo_gymnasium.utils import MOSyncVectorEnv

from morl_baselines.common.monotonic_utility import UtilityFunction
from morl_baselines.common.evaluation import (
eval_mo_reward_conditioned,
policy_evaluation_mo,
Expand Down Expand Up @@ -55,11 +54,11 @@ def eval(self, obs: np.ndarray, w: Optional[np.ndarray]) -> Union[int, np.ndarra
"""

def __report(
self,
scalarized_return,
scalarized_discounted_return,
vec_return,
discounted_vec_return,
self,
scalarized_return,
scalarized_discounted_return,
vec_return,
discounted_vec_return,
):
"""Writes the data to wandb summary."""
if self.id is None:
Expand All @@ -80,12 +79,12 @@ def __report(
)

def policy_eval(
self,
eval_env,
num_episodes: int = 5,
scalarization=np.dot,
weights: Optional[np.ndarray] = None,
log: bool = False,
self,
eval_env,
num_episodes: int = 5,
scalarization=np.dot,
weights: Optional[np.ndarray] = None,
log: bool = False,
):
"""Runs a policy evaluation (typically over a few episodes) on eval_env and logs some metrics if asked.
Expand Down Expand Up @@ -117,11 +116,11 @@ def policy_eval(
return scalarized_return, scalarized_discounted_return, vec_return, discounted_vec_return

def policy_eval_esr(
self,
eval_env,
scalarization,
weights: Optional[np.ndarray] = None,
log: bool = False,
self,
eval_env,
scalarization,
weights: Optional[np.ndarray] = None,
log: bool = False,
):
"""Runs a policy evaluation (typically on one episode) on eval_env and logs some metrics if asked.
Expand Down Expand Up @@ -183,15 +182,7 @@ def update(self) -> None:
class MOAgent(ABC):
"""An MORL Agent, can contain one or multiple MOPolicies. Contains helpers to extract features from the environment, setup logging etc."""

def __init__(
self,
env: Optional[gym.Env],
num_utility_fns: int = 100,
min_val: Optional[np.ndarray] = None,
max_val: Optional[np.ndarray] = None,
device: Union[th.device, str] = "auto",
seed: Optional[int] = None
) -> None:
def __init__(self, env: Optional[gym.Env], device: Union[th.device, str] = "auto", seed: Optional[int] = None) -> None:
"""Initializes the agent.
Args:
Expand All @@ -206,16 +197,6 @@ def __init__(
self.num_episodes = 0
self.seed = seed
self.np_random = np.random.default_rng(self.seed)
self.min_val = min_val if min_val is not None else np.zeros(self.reward_dim)
self.max_val = max_val if max_val is not None else np.ones(self.reward_dim)
self.utility_fns = [UtilityFunction( # These are used in evaluation
self.min_val,
self.max_val,
frozen=True,
normalise=True,
max_weight=0.1,
size_factor=1)
]

def extract_env_info(self, env: Optional[gym.Env]) -> None:
"""Extracts all the features of the environment: observation space, action space, ...
Expand Down Expand Up @@ -261,7 +242,7 @@ def register_additional_config(self, conf: Dict = {}) -> None:
wandb.config[key] = value

def setup_wandb(
self, project_name: str, experiment_name: str, entity: Optional[str] = None, group: Optional[str] = None
self, project_name: str, experiment_name: str, entity: Optional[str] = None, group: Optional[str] = None
) -> None:
"""Initializes the wandb writer.
Expand All @@ -288,6 +269,9 @@ def setup_wandb(
entity=entity,
config=config,
name=self.full_experiment_name,
monitor_gym=monitor_gym,
save_code=True,
group=group,
)
# The default "step" of wandb is not the actual time step (gloabl_step) of the MDP
wandb.define_metric("*", step_metric="global_step")
Expand Down
18 changes: 0 additions & 18 deletions morl_baselines/common/performance_indicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from copy import deepcopy
from typing import Callable, List

import torch
import numpy as np
import numpy.typing as npt
from pymoo.indicators.hv import HV
Expand Down Expand Up @@ -123,20 +122,3 @@ def maximum_utility_loss(
max_scalarized_values = [np.max([utility(weight, point) for point in front]) for weight in weights_set]
utility_losses = [max_scalarized_values_ref[i] - max_scalarized_values[i] for i in range(len(max_scalarized_values))]
return np.max(utility_losses)


def generalised_maximum_utility_loss(front, reference_set, utility_fns):
"""Compute the maximum utility loss for a front and utility functions wrt a reference set."""
utility_losses = []
for utility_fn in utility_fns:
front_utilities = utility_fn(reference_set) # Compute the utility for the front
approx_utilities = utility_fn(front) # Compute the utility for the approximate front
max_utility_loss = torch.max(front_utilities) - torch.max(approx_utilities) # Compute the utility loss.
utility_losses.append(max_utility_loss)
return torch.max(torch.stack(utility_losses))


def generalised_expected_utility(front, utility_fns):
"""Compute the expected utility for the set of utility functions when taking vectors from the front."""
utilities = [torch.max(utility_fn(front)) for utility_fn in utility_fns]
return torch.mean(torch.stack(utilities))
Empty file.
Loading

0 comments on commit 349ecd5

Please sign in to comment.