From 3ade179aaa8f3054ae18975bd5112a354fcbfbc6 Mon Sep 17 00:00:00 2001 From: arnupretorius Date: Mon, 4 Sep 2023 16:29:37 +0200 Subject: [PATCH] docs: add references to games and fix docstrings --- matrax/env.py | 2 +- matrax/env_test.py | 16 +--------------- matrax/games/climbing.py | 6 ++++++ matrax/games/conflict.py | 8 ++++++++ matrax/games/no_conflict.py | 8 ++++++++ matrax/games/penalty.py | 6 ++++++ 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/matrax/env.py b/matrax/env.py index e14b649..2852794 100644 --- a/matrax/env.py +++ b/matrax/env.py @@ -26,7 +26,7 @@ class MatrixGame(Environment[State]): - """JAX implementation of a 2-player matrix game environment: + """JAX implementation of the 2-player matrix game environment: https://github.com/uoe-agents/matrix-games A matrix game is a two-player game where each player has a set of actions and a payoff matrix. diff --git a/matrax/env_test.py b/matrax/env_test.py index 2affbad..da09118 100644 --- a/matrax/env_test.py +++ b/matrax/env_test.py @@ -12,20 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# # Copyright 2022 InstaDeep Ltd. All rights reserved. -# # -# # Licensed under the Apache License, Version 2.0 (the "License"); -# # you may not use this file except in compliance with the License. -# # You may obtain a copy of the License at -# # -# # http://www.apache.org/licenses/LICENSE-2.0 -# # -# # Unless required by applicable law or agreed to in writing, software -# # distributed under the License is distributed on an "AS IS" BASIS, -# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# # See the License for the specific language governing permissions and -# # limitations under the License. - import chex import jax import jax.numpy as jnp @@ -165,7 +151,7 @@ def test_matrix_game__time_limit(matrix_game_env: MatrixGame) -> None: def test_matrix_game__reward(matrix_game_env: MatrixGame) -> None: - """Validate the termination after time limit has been reached.""" + """Validate the rewards are correct based on agent actions.""" step_fn = jax.jit(matrix_game_env.step) state_key = random.PRNGKey(10) state, timestep = matrix_game_env.reset(state_key) diff --git a/matrax/games/climbing.py b/matrax/games/climbing.py index e92318d..4a3143d 100644 --- a/matrax/games/climbing.py +++ b/matrax/games/climbing.py @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Game reference: +# ----------------- +# Claus C, Boutilier C. The dynamics of reinforcement learning in +# cooperative multiagent systems. AAAI/IAAI. 1998. +# https://www.cs.toronto.edu/~cebly/Papers/_download_/multirl.pdf + import jax.numpy as jnp climbing_game = jnp.array( diff --git a/matrax/games/conflict.py b/matrax/games/conflict.py index 8f3ff50..c838277 100644 --- a/matrax/games/conflict.py +++ b/matrax/games/conflict.py @@ -12,6 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Game reference: +# ----------------- +# [1] Rapoport, A., and M. Guyer. 1966. “A Taxonomy of 2 × 2 Games.” General Systems: +# Yearbook of the Society for General Systems Research 11:203–214. +# [2] Albrecht SV, Ramamoorthy S. Comparative Evaluation of Multiagent Learning Algorithms +# in a Diverse Set of Ad Hoc Team Problems. arXiv preprint arXiv:1907.09189. 2019 Jul 22. +# https://arxiv.org/pdf/1907.09189.pdf + from matrax.games.utils import convert_payoff_vector_to_matrix # 1 (7) diff --git a/matrax/games/no_conflict.py b/matrax/games/no_conflict.py index 0a5bc73..a7bd3c5 100644 --- a/matrax/games/no_conflict.py +++ b/matrax/games/no_conflict.py @@ -12,6 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Game reference: +# ----------------- +# [1] Rapoport, A., and M. Guyer. 1966. “A Taxonomy of 2 × 2 Games.” General Systems: +# Yearbook of the Society for General Systems Research 11:203–214. +# [2] Albrecht SV, Ramamoorthy S. Comparative Evaluation of Multiagent Learning Algorithms +# in a Diverse Set of Ad Hoc Team Problems. arXiv preprint arXiv:1907.09189. 2019 Jul 22. +# https://arxiv.org/pdf/1907.09189.pdf + from matrax.games.utils import convert_payoff_vector_to_matrix # 1 (1) diff --git a/matrax/games/penalty.py b/matrax/games/penalty.py index 74ff32b..f3bfe76 100644 --- a/matrax/games/penalty.py +++ b/matrax/games/penalty.py @@ -12,6 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Game reference: +# ----------------- +# Claus C, Boutilier C. The dynamics of reinforcement learning in +# cooperative multiagent systems. AAAI/IAAI. 1998. +# https://www.cs.toronto.edu/~cebly/Papers/_download_/multirl.pdf + import jax.numpy as jnp penalty_games = {}