From de6e419a7a99d1f3010b97f3579e9338277a142b Mon Sep 17 00:00:00 2001
From: Abhishek Sriraman <abhisheksriraman@gmail.com>
Date: Mon, 16 Dec 2024 14:48:58 -0800
Subject: [PATCH 1/8] Switch to gymnasium in favor of openai gym.

Gymnasium's gym is a drop-in replacement of open-ai's gym. Some of these gymnasium modules were already being used in this repo as it depends on PettingZoo.
---
 setup.py                                       |  2 +-
 .../imitation/behavior_cloning_tf2.py          |  6 +++---
 src/human_aware_rl/rllib/rllib.py              | 18 +++++++++---------
 src/overcooked_ai_py/__init__.py               |  2 +-
 src/overcooked_ai_py/mdp/overcooked_env.py     | 17 ++++++++---------
 testing/overcooked_test.py                     |  6 +++---
 6 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/setup.py b/setup.py
index 726c4898..89c44b38 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@
         "numpy",
         "scipy",
         "tqdm",
-        "gym",
+        "gymnasium",
         "pettingzoo",
         "ipython",
         "pygame",
diff --git a/src/human_aware_rl/imitation/behavior_cloning_tf2.py b/src/human_aware_rl/imitation/behavior_cloning_tf2.py
index ae43c162..54dd7517 100644
--- a/src/human_aware_rl/imitation/behavior_cloning_tf2.py
+++ b/src/human_aware_rl/imitation/behavior_cloning_tf2.py
@@ -474,8 +474,8 @@ def __init__(self, observation_space, action_space, config):
         """
         RLLib compatible constructor for initializing a behavior cloning model
 
-        observation_space (gym.Space|tuple)     Shape of the featurized observations
-        action_space (gym.space|tuple)          Shape of the action space (len(Action.All_ACTIONS),)
+        observation_space (gymnasium.Space|tuple)     Shape of the featurized observations
+        action_space (gymnasium.space|tuple)          Shape of the action space (len(Action.All_ACTIONS),)
         config (dict)                           Dictionary of relavant bc params
             - model_dir (str)                   Path to pickled keras.Model used to map observations to action logits
             - stochastic (bool)                 Whether action should return logit argmax or sample over distribution
@@ -519,7 +519,7 @@ def __init__(self, observation_space, action_space, config):
         self.context = self._create_execution_context()
 
     def _setup_shapes(self):
-        # This is here to make the class compatible with both tuples or gym.Space objs for the spaces
+        # This is here to make the class compatible with both tuples or gymnasium.Space objs for the spaces
         # Note: action_space = (len(Action.ALL_ACTIONS,)) is technically NOT the action space shape, which would be () since actions are scalars
         self.observation_shape = (
             self.observation_space
diff --git a/src/human_aware_rl/rllib/rllib.py b/src/human_aware_rl/rllib/rllib.py
index 162530bf..f54fe345 100644
--- a/src/human_aware_rl/rllib/rllib.py
+++ b/src/human_aware_rl/rllib/rllib.py
@@ -6,7 +6,7 @@
 from datetime import datetime
 
 import dill
-import gym
+import gymnasium
 import numpy as np
 import ray
 from ray.rllib.agents.ppo import PPOTrainer
@@ -32,8 +32,8 @@
     OvercookedGridworld,
 )
 
-action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
-obs_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
+action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
+obs_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
 timestr = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
 
 
@@ -218,9 +218,9 @@ def _validate_schedule(self, schedule):
     def _setup_action_space(self, agents):
         action_sp = {}
         for agent in agents:
-            action_sp[agent] = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
-        self.action_space = gym.spaces.Dict(action_sp)
-        self.shared_action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
+            action_sp[agent] = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
+        self.action_space = gymnasium.spaces.Dict(action_sp)
+        self.shared_action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
 
     def _setup_observation_space(self, agents):
         dummy_state = self.base_env.mdp.get_standard_start_state()
@@ -232,7 +232,7 @@ def _setup_observation_space(self, agents):
 
         high = np.ones(obs_shape) * float("inf")
         low = np.ones(obs_shape) * 0
-        self.ppo_observation_space = gym.spaces.Box(
+        self.ppo_observation_space = gymnasium.spaces.Box(
             np.float32(low), np.float32(high), dtype=np.float32
         )
 
@@ -243,7 +243,7 @@ def _setup_observation_space(self, agents):
         obs_shape = featurize_fn_bc(dummy_state)[0].shape
         high = np.ones(obs_shape) * 100
         low = np.ones(obs_shape) * -100
-        self.bc_observation_space = gym.spaces.Box(
+        self.bc_observation_space = gymnasium.spaces.Box(
             np.float32(low), np.float32(high), dtype=np.float32
         )
         # hardcode mapping between action space and agent
@@ -253,7 +253,7 @@ def _setup_observation_space(self, agents):
                 ob_space[agent] = self.ppo_observation_space
             else:
                 ob_space[agent] = self.bc_observation_space
-        self.observation_space = gym.spaces.Dict(ob_space)
+        self.observation_space = gymnasium.spaces.Dict(ob_space)
 
     def _get_featurize_fn(self, agent_id):
         if agent_id.startswith("ppo"):
diff --git a/src/overcooked_ai_py/__init__.py b/src/overcooked_ai_py/__init__.py
index 42a19935..dd0a830f 100644
--- a/src/overcooked_ai_py/__init__.py
+++ b/src/overcooked_ai_py/__init__.py
@@ -1,4 +1,4 @@
-from gym.envs.registration import register
+from gymnasium.envs.registration import register
 
 register(
     id="Overcooked-v0",
diff --git a/src/overcooked_ai_py/mdp/overcooked_env.py b/src/overcooked_ai_py/mdp/overcooked_env.py
index b61c0ca6..a4fe2813 100644
--- a/src/overcooked_ai_py/mdp/overcooked_env.py
+++ b/src/overcooked_ai_py/mdp/overcooked_env.py
@@ -2,7 +2,6 @@
 import time
 
 import cv2
-import gym
 import gymnasium
 import numpy as np
 import pygame
@@ -715,8 +714,8 @@ def observation_space(self, agent):
         dummy_mdp = self.base_env.mdp
         dummy_state = dummy_mdp.get_standard_start_state()
         obs_shape = agent.featurize(dummy_state)[0].shape
-        high = np.ones(obs_shape) * float("inf")
-        low = np.zeros(obs_shape)
+        high = np.ones(obs_shape, dtype=np.float32) * float("inf")
+        low = np.zeros(obs_shape, dtype=np.float32)
         return gymnasium.spaces.Box(low, high, dtype=np.float32)
 
     # we want to return the same space object every time
@@ -780,7 +779,7 @@ def render(self, mode="human", close=False):
         pass
 
 
-class Overcooked(gym.Env):
+class Overcooked(gymnasium.Env):
     """
     Wrapper for the Env class above that is SOMEWHAT compatible with the standard gym API.
     Why only somewhat? Because we need to flatten a multi-agent env to be a single-agent env (as gym requires).
@@ -814,7 +813,7 @@ def __init__(self, base_env, featurize_fn, baselines_reproducible=False):
 
         mdp = OvercookedGridworld.from_layout_name("asymmetric_advantages")
         base_env = OvercookedEnv.from_mdp(mdp, horizon=500)
-        env = gym.make("Overcooked-v0",base_env = base_env, featurize_fn =base_env.featurize_state_mdp)
+        env = gymnasium.make("Overcooked-v0",base_env = base_env, featurize_fn =base_env.featurize_state_mdp)
         """
         if baselines_reproducible:
             # NOTE:
@@ -830,7 +829,7 @@ def __init__(self, base_env, featurize_fn, baselines_reproducible=False):
         self.base_env = base_env
         self.featurize_fn = featurize_fn
         self.observation_space = self._setup_observation_space()
-        self.action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))
+        self.action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
         self.reset()
         self.visualizer = StateVisualizer()
 
@@ -838,9 +837,9 @@ def _setup_observation_space(self):
         dummy_mdp = self.base_env.mdp
         dummy_state = dummy_mdp.get_standard_start_state()
         obs_shape = self.featurize_fn(dummy_state)[0].shape
-        high = np.ones(obs_shape) * float("inf")
-        low = np.zeros(obs_shape)
-        return gym.spaces.Box(low, high, dtype=np.float32)
+        high = np.ones(obs_shape, dtype=np.float32) * float("inf")
+        low = np.zeros(obs_shape, dtype=np.float32)
+        return gymnasium.spaces.Box(low, high, dtype=np.float32)
 
     def step(self, action):
         """
diff --git a/testing/overcooked_test.py b/testing/overcooked_test.py
index 2bd5b989..675e618b 100644
--- a/testing/overcooked_test.py
+++ b/testing/overcooked_test.py
@@ -6,7 +6,7 @@
 import unittest
 from math import factorial
 
-import gym
+import gymnasium
 import numpy as np
 
 from overcooked_ai_py.agents.agent import (
@@ -1699,13 +1699,13 @@ def setUp(self):
         np.random.seed(0)
 
     def test_creation(self):
-        env = gym.make(
+        env = gymnasium.make(
             "Overcooked-v0",
             base_env=self.env,
             featurize_fn=self.env.featurize_state_mdp,
         )
         # verify that the action_space * obs_space are initialized correctly
-        self.assertEqual(env.action_space, gym.spaces.Discrete(6))
+        self.assertEqual(env.action_space, gymnasium.spaces.Discrete(6))
         self.assertEqual(
             env.observation_space.shape,
             self.base_mdp.get_featurize_state_shape(),

From 912c2d32fa2f4ef6b806a2dc6797780b6a096b16 Mon Sep 17 00:00:00 2001
From: Micah Carroll <mdc@berkeley.edu>
Date: Fri, 31 Jan 2025 23:06:15 -0800
Subject: [PATCH 2/8] Discontinuing pettingzoo

---
 setup.py                                   |   1 -
 src/overcooked_ai_py/mdp/overcooked_env.py | 222 ++++++++++-----------
 testing/overcooked_test.py                 |  25 ---
 3 files changed, 111 insertions(+), 137 deletions(-)

diff --git a/setup.py b/setup.py
index 89c44b38..804bf4a4 100644
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,6 @@
         "scipy",
         "tqdm",
         "gymnasium",
-        "pettingzoo",
         "ipython",
         "pygame",
         "ipywidgets",
diff --git a/src/overcooked_ai_py/mdp/overcooked_env.py b/src/overcooked_ai_py/mdp/overcooked_env.py
index a4fe2813..862fcbc1 100644
--- a/src/overcooked_ai_py/mdp/overcooked_env.py
+++ b/src/overcooked_ai_py/mdp/overcooked_env.py
@@ -666,117 +666,117 @@ def proportion_stuck_time(trajectories, agent_idx, stuck_time=3):
         return stuck_matrix
 
 
-from pettingzoo.utils.env import ParallelEnv
-
-from overcooked_ai_py.agents.agent import AgentPair
-
-
-class OvercookedEnvPettingZoo(ParallelEnv):
-    def __init__(self, base_env, agents):
-        """
-        base_env: OvercookedEnv
-        agents: AgentPair
-
-        Example creating a PettingZoo env from a base_env:
-
-        mdp = OvercookedGridworld.from_layout_name("asymmetric_advantages")
-        base_env = OvercookedEnv.from_mdp(mdp, horizon=500)
-        agent_pair = load_agent_pair("path/to/checkpoint", "ppo", "ppo")
-        env = OvercookedEnvPettingZoo(base_env, agent_pair)
-
-        """
-        # we need agent-dependent observation space, and the best way to do it is just to include an agentPair
-        assert isinstance(
-            agents, AgentPair
-        ), "agents must be an AgentPair object"
-
-        self.agents = ["agent_0", "agent_1"]
-        self.possible_agents = ["agent_0", "agent_1"]
-        self.agent_map = {"agent_0": agents.a0, "agent_1": agents.a1}
-        self.base_env = base_env
-        self.observation_spaces = {
-            agent: self.observation_space(agent) for agent in self.agents
-        }
-        self.action_spaces = {
-            agent: gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
-            for agent in self.agents
-        }
-        # this is the AgentPair object
-        self.reset()
-
-    import functools
-
-    # we want to return the same space object every time
-    @functools.lru_cache(maxsize=2)
-    def observation_space(self, agent):
-        # the observation can be different for each agent
-        agent = self.agent_map[agent]
-        dummy_mdp = self.base_env.mdp
-        dummy_state = dummy_mdp.get_standard_start_state()
-        obs_shape = agent.featurize(dummy_state)[0].shape
-        high = np.ones(obs_shape, dtype=np.float32) * float("inf")
-        low = np.zeros(obs_shape, dtype=np.float32)
-        return gymnasium.spaces.Box(low, high, dtype=np.float32)
-
-    # we want to return the same space object every time
-    @functools.lru_cache(maxsize=1)
-    def action_space(self, agent):
-        # the action space is the same for each agent
-        return gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
-
-    def step(self, joint_action):
-        joint_action = [
-            Action.ALL_ACTIONS[joint_action[agent]] for agent in joint_action
-        ]
-        obs, reward, done, info = self.base_env.step(joint_action)
-        # https://gymnasium.farama.org/content/basic_usage/
-        # we have no early termination condition in this env, and the environment only terminates when the time horizon is reached
-        # therefore the terminated is always False, and we set truncated to done
-        terminated = False
-        truncated = done
-
-        def create_dict(value):
-            """
-            Each agent should have the same reward, terminated, truncated, info
-            """
-            return {agent: value for agent in self.agents}
-
-        def create_obs_dict(obs):
-            """
-            Observation is potentially different for each agent
-            """
-            return {
-                agent: self.agent_map[agent].featurize(obs)
-                for agent in self.agents
-            }
-
-        obs = create_obs_dict(obs)
-        reward = create_dict(reward)
-        terminated = create_dict(terminated)
-        truncated = create_dict(truncated)
-        info = create_dict(info)
-        if done:
-            self.agents = []
-        return obs, reward, terminated, truncated, info
-
-    def reset(self, seed=None, options=None):
-        """
-        Reset the embedded OvercookedEnv envrionment to the starting state
-        """
-        self.base_env.reset()
-        dummy_mdp = self.base_env.mdp
-        dummy_state = dummy_mdp.get_standard_start_state()
-        # when an environment terminates/truncates, PettingZoo wants all agents removed, so during reset we re-add them
-        self.agents = self.possible_agents[:]
-        # return the obsevations as dict
-        obs_dict = {
-            agent: self.agent_map[agent].featurize(dummy_state)[0]
-            for agent in self.agents
-        }
-        return obs_dict, None
-
-    def render(self, mode="human", close=False):
-        pass
+# from pettingzoo.utils.env import ParallelEnv
+
+# from overcooked_ai_py.agents.agent import AgentPair
+
+
+# class OvercookedEnvPettingZoo(ParallelEnv):
+#     def __init__(self, base_env, agents):
+#         """
+#         base_env: OvercookedEnv
+#         agents: AgentPair
+
+#         Example creating a PettingZoo env from a base_env:
+
+#         mdp = OvercookedGridworld.from_layout_name("asymmetric_advantages")
+#         base_env = OvercookedEnv.from_mdp(mdp, horizon=500)
+#         agent_pair = load_agent_pair("path/to/checkpoint", "ppo", "ppo")
+#         env = OvercookedEnvPettingZoo(base_env, agent_pair)
+
+#         """
+#         # we need agent-dependent observation space, and the best way to do it is just to include an agentPair
+#         assert isinstance(
+#             agents, AgentPair
+#         ), "agents must be an AgentPair object"
+
+#         self.agents = ["agent_0", "agent_1"]
+#         self.possible_agents = ["agent_0", "agent_1"]
+#         self.agent_map = {"agent_0": agents.a0, "agent_1": agents.a1}
+#         self.base_env = base_env
+#         self.observation_spaces = {
+#             agent: self.observation_space(agent) for agent in self.agents
+#         }
+#         self.action_spaces = {
+#             agent: gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
+#             for agent in self.agents
+#         }
+#         # this is the AgentPair object
+#         self.reset()
+
+#     import functools
+
+#     # we want to return the same space object every time
+#     @functools.lru_cache(maxsize=2)
+#     def observation_space(self, agent):
+#         # the observation can be different for each agent
+#         agent = self.agent_map[agent]
+#         dummy_mdp = self.base_env.mdp
+#         dummy_state = dummy_mdp.get_standard_start_state()
+#         obs_shape = agent.featurize(dummy_state)[0].shape
+#         high = np.ones(obs_shape, dtype=np.float32) * float("inf")
+#         low = np.zeros(obs_shape, dtype=np.float32)
+#         return gymnasium.spaces.Box(low, high, dtype=np.float32)
+
+#     # we want to return the same space object every time
+#     @functools.lru_cache(maxsize=1)
+#     def action_space(self, agent):
+#         # the action space is the same for each agent
+#         return gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
+
+#     def step(self, joint_action):
+#         joint_action = [
+#             Action.ALL_ACTIONS[joint_action[agent]] for agent in joint_action
+#         ]
+#         obs, reward, done, info = self.base_env.step(joint_action)
+#         # https://gymnasium.farama.org/content/basic_usage/
+#         # we have no early termination condition in this env, and the environment only terminates when the time horizon is reached
+#         # therefore the terminated is always False, and we set truncated to done
+#         terminated = False
+#         truncated = done
+
+#         def create_dict(value):
+#             """
+#             Each agent should have the same reward, terminated, truncated, info
+#             """
+#             return {agent: value for agent in self.agents}
+
+#         def create_obs_dict(obs):
+#             """
+#             Observation is potentially different for each agent
+#             """
+#             return {
+#                 agent: self.agent_map[agent].featurize(obs)
+#                 for agent in self.agents
+#             }
+
+#         obs = create_obs_dict(obs)
+#         reward = create_dict(reward)
+#         terminated = create_dict(terminated)
+#         truncated = create_dict(truncated)
+#         info = create_dict(info)
+#         if done:
+#             self.agents = []
+#         return obs, reward, terminated, truncated, info
+
+#     def reset(self, seed=None, options=None):
+#         """
+#         Reset the embedded OvercookedEnv envrionment to the starting state
+#         """
+#         self.base_env.reset()
+#         dummy_mdp = self.base_env.mdp
+#         dummy_state = dummy_mdp.get_standard_start_state()
+#         # when an environment terminates/truncates, PettingZoo wants all agents removed, so during reset we re-add them
+#         self.agents = self.possible_agents[:]
+#         # return the obsevations as dict
+#         obs_dict = {
+#             agent: self.agent_map[agent].featurize(dummy_state)[0]
+#             for agent in self.agents
+#         }
+#         return obs_dict, None
+
+#     def render(self, mode="human", close=False):
+#         pass
 
 
 class Overcooked(gymnasium.Env):
diff --git a/testing/overcooked_test.py b/testing/overcooked_test.py
index 675e618b..5f17a026 100644
--- a/testing/overcooked_test.py
+++ b/testing/overcooked_test.py
@@ -1714,31 +1714,6 @@ def test_creation(self):
     # TODO: write more tests here
 
 
-class TestPettingZooEnvironment(unittest.TestCase):
-    def test_api(self):
-        from pettingzoo.test import parallel_api_test
-
-        # Check whether ray is installed and skip if not
-        try:
-            from human_aware_rl.rllib.rllib import load_agent_pair
-        except ModuleNotFoundError:
-            return
-
-        base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
-        # get the current directory of the file
-        current_dir = os.path.dirname(os.path.realpath(__file__))
-        agent_dir = os.path.join(
-            current_dir,
-            "../src/overcooked_demo/server/static/assets/agents/RllibCrampedRoomSP/agent",
-        )
-        ap = load_agent_pair(agent_dir, "ppo", "ppo")
-        env = OvercookedEnv.from_mdp(base_mdp, info_level=0, horizon=1000)
-        from overcooked_ai_py.mdp.overcooked_env import OvercookedEnvPettingZoo
-
-        wrapped_env = OvercookedEnvPettingZoo(env, ap)
-        parallel_api_test(wrapped_env, num_cycles=1000)
-
-
 class TestTrajectories(unittest.TestCase):
     def setUp(self):
         self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")

From 581e2123abe3c357462856156f2c47e419eb6ef2 Mon Sep 17 00:00:00 2001
From: Micah Carroll <mdc@berkeley.edu>
Date: Fri, 31 Jan 2025 23:10:51 -0800
Subject: [PATCH 3/8] Attempting to fix github actions

---
 .github/workflows/python-app.yml  | 2 +-
 .github/workflows/pythontests.yml | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 2fc0b7ae..e3cc0c39 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -16,7 +16,7 @@ jobs:
       - name: Python setup #removed conda setup
         uses: actions/setup-python@v4
         with:
-          python-version: 3.7
+          python-version: 3.8
       - name: Installing dependencies
         run: pip install -e .[harl]
       - name: Test human_aware_rl with unittest
diff --git a/.github/workflows/pythontests.yml b/.github/workflows/pythontests.yml
index 0ea8c9ac..da89bf53 100644
--- a/.github/workflows/pythontests.yml
+++ b/.github/workflows/pythontests.yml
@@ -21,10 +21,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v4
       with:
-        python-version: '3.7'
+        python-version: '3.8'
         architecture: 'x64'
     - name: Install dependencies
       run: |

From 022d1276d9851c55a9b1008f32e855dc7bb3c8e7 Mon Sep 17 00:00:00 2001
From: Micah Carroll <mdc@berkeley.edu>
Date: Fri, 31 Jan 2025 23:15:16 -0800
Subject: [PATCH 4/8] Trying again

---
 .github/workflows/pythonlint.yml  | 4 ++--
 .github/workflows/pythontests.yml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pythonlint.yml b/.github/workflows/pythonlint.yml
index 5497aeeb..5449425b 100644
--- a/.github/workflows/pythonlint.yml
+++ b/.github/workflows/pythonlint.yml
@@ -8,10 +8,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python 3.7
+    - name: Set up Python 3.8
       uses: actions/setup-python@v4
       with:
-        python-version: '3.7' 
+        python-version: 3.8
         architecture: 'x64'
     - name: Install dependencies
       run: |
diff --git a/.github/workflows/pythontests.yml b/.github/workflows/pythontests.yml
index da89bf53..08c6cde3 100644
--- a/.github/workflows/pythontests.yml
+++ b/.github/workflows/pythontests.yml
@@ -24,7 +24,7 @@ jobs:
     - name: Set up Python 3.8
       uses: actions/setup-python@v4
       with:
-        python-version: '3.8'
+        python-version: 3.8
         architecture: 'x64'
     - name: Install dependencies
       run: |

From 8ce9baa5f15c409f2a696076df6bc16789ff216e Mon Sep 17 00:00:00 2001
From: Micah Carroll <mdc@berkeley.edu>
Date: Fri, 31 Jan 2025 23:29:26 -0800
Subject: [PATCH 5/8] Trying to resolve a pydantic bug

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 804bf4a4..f3e77be8 100644
--- a/setup.py
+++ b/setup.py
@@ -43,6 +43,7 @@
         "scipy",
         "tqdm",
         "gymnasium",
+        "pydantic<2.0",
         "ipython",
         "pygame",
         "ipywidgets",

From 8b73158a913e0a5139dfd29feeabfd9e9a688133 Mon Sep 17 00:00:00 2001
From: Micah Carroll <mdc@berkeley.edu>
Date: Fri, 31 Jan 2025 23:44:08 -0800
Subject: [PATCH 6/8] Discontinuing harl tests/support

---
 .github/workflows/python-app.yml | 26 --------------------------
 README.md                        |  2 +-
 setup.py                         |  1 -
 3 files changed, 1 insertion(+), 28 deletions(-)
 delete mode 100644 .github/workflows/python-app.yml

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
deleted file mode 100644
index e3cc0c39..00000000
--- a/.github/workflows/python-app.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-name: Human Aware Rllib
-
-on:
-  push:
-    branches: "*"
-  pull_request:
-    branches: "*"
-
-jobs:
-  build_ubuntu:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          submodules: true
-      - name: Python setup #removed conda setup
-        uses: actions/setup-python@v4
-        with:
-          python-version: 3.8
-      - name: Installing dependencies
-        run: pip install -e .[harl]
-      - name: Test human_aware_rl with unittest
-        run: |
-          cd src/human_aware_rl
-          sudo chmod 777 ./run_tests.sh
-          ./run_tests.sh
diff --git a/README.md b/README.md
index dd627e4c..c75b4861 100644
--- a/README.md
+++ b/README.md
@@ -92,7 +92,7 @@ $ ./run_tests.sh
 
 ⚠️**Be sure to change your CWD to the human_aware_rl directory before running the script, as the test script uses the CWD to dynamically generate a path to save temporary training runs/checkpoints. The testing script will fail if not being run from the correct directory.**
 
-This will run all tests belonging to the human_aware_rl module. You can checkout the README in the submodule for instructions of running target-specific tests. This can be initiated from any directory.
+This will run all tests belonging to the human_aware_rl module. _These tests don't work anymore out of the box, due to package version issues_: if you fix them, feel free to make a PR. You can checkout the README in the submodule for instructions of running target-specific tests. This can be initiated from any directory.
 
 If you're thinking of using the planning code extensively, you should run the full testing suite that verifies all of the Overcooked accessory tools (this can take 5-10 mins): 
 ```
diff --git a/setup.py b/setup.py
index f3e77be8..804bf4a4 100644
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,6 @@
         "scipy",
         "tqdm",
         "gymnasium",
-        "pydantic<2.0",
         "ipython",
         "pygame",
         "ipywidgets",

From a2970405ba14b51c9757080300b1599c6554e933 Mon Sep 17 00:00:00 2001
From: Micah Carroll <mdc@berkeley.edu>
Date: Fri, 31 Jan 2025 23:47:45 -0800
Subject: [PATCH 7/8] Trying to solve the black linter actions error

---
 src/human_aware_rl/rllib/rllib.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/human_aware_rl/rllib/rllib.py b/src/human_aware_rl/rllib/rllib.py
index f54fe345..639e245d 100644
--- a/src/human_aware_rl/rllib/rllib.py
+++ b/src/human_aware_rl/rllib/rllib.py
@@ -220,7 +220,9 @@ def _setup_action_space(self, agents):
         for agent in agents:
             action_sp[agent] = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
         self.action_space = gymnasium.spaces.Dict(action_sp)
-        self.shared_action_space = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
+        self.shared_action_space = gymnasium.spaces.Discrete(
+            len(Action.ALL_ACTIONS)
+        )
 
     def _setup_observation_space(self, agents):
         dummy_state = self.base_env.mdp.get_standard_start_state()

From 06090f2db2cc02c937bbbd87e8a9f55f862c8c2d Mon Sep 17 00:00:00 2001
From: Micah Carroll <mdc@berkeley.edu>
Date: Fri, 31 Jan 2025 23:49:38 -0800
Subject: [PATCH 8/8] Fixing linting

---
 src/human_aware_rl/rllib/rllib.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/human_aware_rl/rllib/rllib.py b/src/human_aware_rl/rllib/rllib.py
index 639e245d..3b14168f 100644
--- a/src/human_aware_rl/rllib/rllib.py
+++ b/src/human_aware_rl/rllib/rllib.py
@@ -218,7 +218,9 @@ def _validate_schedule(self, schedule):
     def _setup_action_space(self, agents):
         action_sp = {}
         for agent in agents:
-            action_sp[agent] = gymnasium.spaces.Discrete(len(Action.ALL_ACTIONS))
+            action_sp[agent] = gymnasium.spaces.Discrete(
+                len(Action.ALL_ACTIONS)
+            )
         self.action_space = gymnasium.spaces.Dict(action_sp)
         self.shared_action_space = gymnasium.spaces.Discrete(
             len(Action.ALL_ACTIONS)