From 3ade179aaa8f3054ae18975bd5112a354fcbfbc6 Mon Sep 17 00:00:00 2001
From: arnupretorius <arnupretorius@gmail.com>
Date: Mon, 4 Sep 2023 16:29:37 +0200
Subject: [PATCH] docs: add references to games and fix docstrings

---
 matrax/env.py               |  2 +-
 matrax/env_test.py          | 16 +---------------
 matrax/games/climbing.py    |  6 ++++++
 matrax/games/conflict.py    |  8 ++++++++
 matrax/games/no_conflict.py |  8 ++++++++
 matrax/games/penalty.py     |  6 ++++++
 6 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/matrax/env.py b/matrax/env.py
index e14b649..2852794 100644
--- a/matrax/env.py
+++ b/matrax/env.py
@@ -26,7 +26,7 @@
 
 
 class MatrixGame(Environment[State]):
-    """JAX implementation of a 2-player matrix game environment:
+    """JAX implementation of the 2-player matrix game environment:
     https://github.com/uoe-agents/matrix-games
 
     A matrix game is a two-player game where each player has a set of actions and a payoff matrix.
diff --git a/matrax/env_test.py b/matrax/env_test.py
index 2affbad..da09118 100644
--- a/matrax/env_test.py
+++ b/matrax/env_test.py
@@ -12,20 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# # Copyright 2022 InstaDeep Ltd. All rights reserved.
-# #
-# # Licensed under the Apache License, Version 2.0 (the "License");
-# # you may not use this file except in compliance with the License.
-# # You may obtain a copy of the License at
-# #
-# #     http://www.apache.org/licenses/LICENSE-2.0
-# #
-# # Unless required by applicable law or agreed to in writing, software
-# # distributed under the License is distributed on an "AS IS" BASIS,
-# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# # See the License for the specific language governing permissions and
-# # limitations under the License.
-
 import chex
 import jax
 import jax.numpy as jnp
@@ -165,7 +151,7 @@ def test_matrix_game__time_limit(matrix_game_env: MatrixGame) -> None:
 
 
 def test_matrix_game__reward(matrix_game_env: MatrixGame) -> None:
-    """Validate the termination after time limit has been reached."""
+    """Validate the rewards are correct based on agent actions."""
     step_fn = jax.jit(matrix_game_env.step)
     state_key = random.PRNGKey(10)
     state, timestep = matrix_game_env.reset(state_key)
diff --git a/matrax/games/climbing.py b/matrax/games/climbing.py
index e92318d..4a3143d 100644
--- a/matrax/games/climbing.py
+++ b/matrax/games/climbing.py
@@ -12,6 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Game reference:
+# -----------------
+# Claus C, Boutilier C. The dynamics of reinforcement learning in
+# cooperative multiagent systems. AAAI/IAAI. 1998.
+# https://www.cs.toronto.edu/~cebly/Papers/_download_/multirl.pdf
+
 import jax.numpy as jnp
 
 climbing_game = jnp.array(
diff --git a/matrax/games/conflict.py b/matrax/games/conflict.py
index 8f3ff50..c838277 100644
--- a/matrax/games/conflict.py
+++ b/matrax/games/conflict.py
@@ -12,6 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Game reference:
+# -----------------
+# [1] Rapoport, A., and M. Guyer. 1966. “A Taxonomy of 2 × 2 Games.” General Systems:
+# Yearbook of the Society for General Systems Research 11:203–214.
+# [2] Albrecht SV, Ramamoorthy S. Comparative Evaluation of Multiagent Learning Algorithms
+# in a Diverse Set of Ad Hoc Team Problems. arXiv preprint arXiv:1907.09189. 2019 Jul 22.
+# https://arxiv.org/pdf/1907.09189.pdf
+
 from matrax.games.utils import convert_payoff_vector_to_matrix
 
 # 1 (7)
diff --git a/matrax/games/no_conflict.py b/matrax/games/no_conflict.py
index 0a5bc73..a7bd3c5 100644
--- a/matrax/games/no_conflict.py
+++ b/matrax/games/no_conflict.py
@@ -12,6 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Game reference:
+# -----------------
+# [1] Rapoport, A., and M. Guyer. 1966. “A Taxonomy of 2 × 2 Games.” General Systems:
+# Yearbook of the Society for General Systems Research 11:203–214.
+# [2] Albrecht SV, Ramamoorthy S. Comparative Evaluation of Multiagent Learning Algorithms
+# in a Diverse Set of Ad Hoc Team Problems. arXiv preprint arXiv:1907.09189. 2019 Jul 22.
+# https://arxiv.org/pdf/1907.09189.pdf
+
 from matrax.games.utils import convert_payoff_vector_to_matrix
 
 # 1 (1)
diff --git a/matrax/games/penalty.py b/matrax/games/penalty.py
index 74ff32b..f3bfe76 100644
--- a/matrax/games/penalty.py
+++ b/matrax/games/penalty.py
@@ -12,6 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Game reference:
+# -----------------
+# Claus C, Boutilier C. The dynamics of reinforcement learning in
+# cooperative multiagent systems. AAAI/IAAI. 1998.
+# https://www.cs.toronto.edu/~cebly/Papers/_download_/multirl.pdf
+
 import jax.numpy as jnp
 
 penalty_games = {}