minmaxed a bit

ExalFabu · Jan 7, 2024 · 8258b4f · 8258b4f
1 parent 8d1ce8e
commit 8258b4f
Show file tree

Hide file tree

Showing 5 changed files with 141 additions and 49 deletions.
diff --git a/ci_quixo/custom_game.py b/ci_quixo/custom_game.py
@@ -1,6 +1,6 @@
 import numpy as np
 from game import Game, Move
-from collections import namedtuple
+from collections import namedtuple, defaultdict
 from copy import deepcopy
 
 Position = namedtuple("Position", ["x", "y"], defaults=[0, 0])
@@ -110,9 +110,45 @@ def __hash__(self) -> str:
     def __eq__(self, other: "CustomGame") -> bool:
         return self.__hash__() == other.__hash__()
 
-    def valid_moves(self, player: int) -> tuple[CompleteMove]:
-        return [it for it in POSSIBLE_MOVES if self._board[it.position[::-1]] == -1 or self._board[it.position[::-1]] == player]
-
+    def valid_moves(self, player: int, filter_duplicates: bool = True) -> tuple[CompleteMove]:
+        valids = [it for it in POSSIBLE_MOVES if self._board[it.position[::-1]] == -1 or self._board[it.position[::-1]] == player]
+        if not filter_duplicates:
+            return valids
+        s = defaultdict(list)
+        for valid in valids:
+            copy = deepcopy(self)
+            copy._Game__move(*valid, player)
+            s[str(copy)].append(valid)
+        non_duplicate = []
+        for _, moves in s.items():
+            non_duplicate.append(moves[0])
+        return tuple(non_duplicate)
+    def score(self) -> int:
+        winner = self.check_winner()
+        if winner != -1:
+            return (5**5) * 1 if winner == self.current_player_idx else -1
+        transposed = self._board.transpose()
+
+        x_score = []
+        o_score = []
+        for row, column in zip(self._board, transposed):
+            x_score.append(sum(row == 0))
+            x_score.append(sum(column == 0))
+            o_score.append(sum(row == 1))
+            o_score.append(sum(column == 1))
+
+        diag = self._board.diagonal()
+        second_diag = self._board[:, ::-1].diagonal()
+
+        x_score.append(sum(diag == 0))
+        o_score.append(sum(diag == 1))
+        x_score.append(sum(second_diag == 0))
+        o_score.append(sum(second_diag == 1))
+
+        score_x, score_o = 5**max(x_score), 5**max(o_score)
+        score = score_x - score_o
+        score *= 1 if self.current_player_idx == 0 else -1
+        return score
 
 if __name__ == "__main__":
     from random import choice

diff --git a/ci_quixo/helper.py b/ci_quixo/helper.py
@@ -1,8 +1,10 @@
 import numpy as np
 from typing import TYPE_CHECKING
-
+from main import RandomPlayer, Game
+from tqdm.auto import trange
 if TYPE_CHECKING:
     from game import Game
+    from main import Player
 
 def pprint_board(game: "Game"):
     board: np.ndarray = game.get_board()
@@ -15,3 +17,22 @@ def pprint_board(game: "Game"):
             print(c, end="")
         print()
 
+def evaluate(p1: "Player", p2: "Player" = None, games: int = 10, display: bool = False) -> tuple[int]:
+    if p2 is None:
+        p2 = RandomPlayer()
+    won_as_first, won_as_second = 0, 0
+    for i in trange(games, desc="Evaluating player", unit="game"):
+        game = Game()
+        if i % 2 == 0:
+            won_as_first += 1 if game.play(p1, p2) == 0 else 0
+        else:
+            won_as_second += 1 if game.play(p2, p1) == 1 else 0
+    wins = won_as_first + won_as_second
+    wins /= games
+    won_as_first /= games/2        
+    won_as_second /= games/2
+    if display:
+        print(f"Total wins : {wins:.2%}")
+        print(f"Wins as 1st: {won_as_first:.2%}")
+        print(f"Wins as 2nd: {won_as_second:.2%}")
+    return wins, won_as_first, won_as_second
diff --git a/ci_quixo/minmax.py b/ci_quixo/minmax.py
@@ -10,63 +10,77 @@
 
 class MinMaxPlayer(Player):
 
-    def __init__(self, max_depth: int = None, use_alpha_beta_pruning: bool = False) -> None:
+    def __init__(self, max_depth: int = None, use_alpha_beta_pruning: bool = False, verbose: bool = False) -> None:
         super().__init__()
 
         self.max_depth = max_depth
         self.use_alpha_beta_pruning = use_alpha_beta_pruning
-        self._init_ab()
-
-    def _init_ab(self):
-        self._alpha, self._beta = -np.inf, np.inf
-
+        self.verbose = verbose
 
     def make_move(self, game: Game) -> "CompleteMove":
+        self.verbose and print("Deciding move on the following board")
         cg = CustomGame.from_game(game)
-        best_move = self._minmax(0, cg, True)[1]
+        self.verbose and cg.pprint()
+        best_move = self._minmax(cg)
         if best_move is None:
-            best_move = random.choice(cg.valid_moves())
-        print("I made a move...")
+            best_move = random.choice(cg.valid_moves(cg.get_current_player()))
+        cg._Game__move(*best_move, cg.current_player_idx)
+        self.verbose and print(f"Played {best_move=}")
+        self.verbose and cg.pprint()
         return best_move
 
-    def _minmax(self, depth: int, game: "CustomGame", maximixe: bool) -> tuple[float, "CompleteMove"]:
-        winner = game.check_winner()
-        if winner != -1:
-            return 25 * winner, None
-        if self.max_depth is not None and depth >= self.max_depth:
-            return 0, None
+    def _minmax(self, game: "CustomGame") -> "CompleteMove":
 
-        if depth == 0:
-            self._init_ab()        
+        def min_side(self: "MinMaxPlayer", game: "CustomGame", alpha: int, beta: int, depth: int) -> int:
+            winner = game.check_winner()
+            if (self.max_depth is not None and depth >= self.max_depth) or winner != -1:
+                return game.score()
+
+            min_found = np.infty
 
-        best_move = None
-        if maximixe:
-            for move in game.valid_moves(game.get_current_player()):
-                copied = deepcopy(game)
-                assert copied._Game__move(*move, copied.current_player_idx), f"Somehow got an invalid move while iterating from valid moves, {copied}, {move}"
-                score, _ = self._minmax(depth+1, copied, False)
-                if score > self._alpha:
-                    self._alpha = score
-                    best_move = move
-
-                if self.use_alpha_beta_pruning and self._alpha > self._beta:
+            for move in game.valid_moves(game.current_player_idx):
+                copy = deepcopy(game)
+                assert copy._Game__move(*move, copy.current_player_idx), "Somehow move was invalid?????"
+                copy.current_player_idx = 1-copy.current_player_idx
+                min_found = min(min_found, max_side(self, game, alpha, beta, depth+1))
+                beta = min(beta, min_found)
+                if alpha > beta and self.use_alpha_beta_pruning:
                     break
-            return self._alpha, best_move
-        else:
-            for move in game.valid_moves(game.get_current_player()):
-                copied = deepcopy(game)
-                assert copied._Game__move(*move, copied.current_player_idx), "Somehow got an invalid move while iterating from valid moves"
-                score, _ = self._minmax(depth+1, copied, True)
-                if score < self._beta:
-                    self._beta = score
-                    best_move = move
+            return min_found
+
+
+        def max_side(self: "MinMaxPlayer", game: "CustomGame", alpha: int, beta: int, depth: int) -> int:
+            winner = game.check_winner()
+            if (self.max_depth is not None and depth >= self.max_depth) or winner != -1:
+                return game.score()
 
-                if self.use_alpha_beta_pruning and self._alpha > self._beta:
+            max_found = -np.infty
+
+            for move in game.valid_moves(game.current_player_idx):
+                copy = deepcopy(game)
+                assert copy._Game__move(*move, copy.current_player_idx), "Somehow move was invalid?????"
+                copy.current_player_idx = 1-copy.current_player_idx
+                max_found = max(max_found, min_side(self, game, alpha, beta, depth+1))
+                alpha = max(alpha, max_found)
+                if alpha > beta and self.use_alpha_beta_pruning:
                     break
-            return self._beta, best_move
+            return max_found
+
+        best_move = None
+        alpha, beta = -np.inf, np.inf
+
+        for move in game.valid_moves(game.current_player_idx):
+            copy = deepcopy(game)
+            assert copy._Game__move(*move, copy.current_player_idx), "Somehow move was invalid?????"
+            copy.current_player_idx = 1-copy.current_player_idx
+            min_score = min_side(self, game, alpha, beta, 1)
+            if min_score > alpha:
+                alpha = min_score
+                best_move = move
+        self.verbose and print(f"Found best move with score {alpha}")
+        return best_move
+
 
 if __name__ == "__main__":
-    from main import RandomPlayer
-    mm = MinMaxPlayer(20, True)
-    rp = RandomPlayer()
-    game = Game()
+    from helper import evaluate
+    evaluate(MinMaxPlayer(3, True), None, 10, True)
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,6 +9,7 @@ readme = "README.md"
 python = "^3.12"
 numpy = "^1.26.3"
 pytest = "^7.4.4"
+tqdm = "^4.66.1"
 
 
 [build-system]