diff --git a/ci_quixo/custom_game.py b/ci_quixo/custom_game.py index 9d3bd71..2f4dc52 100644 --- a/ci_quixo/custom_game.py +++ b/ci_quixo/custom_game.py @@ -1,6 +1,6 @@ import numpy as np from game import Game, Move -from collections import namedtuple +from collections import namedtuple, defaultdict from copy import deepcopy Position = namedtuple("Position", ["x", "y"], defaults=[0, 0]) @@ -110,9 +110,45 @@ def __hash__(self) -> str: def __eq__(self, other: "CustomGame") -> bool: return self.__hash__() == other.__hash__() - def valid_moves(self, player: int) -> tuple[CompleteMove]: - return [it for it in POSSIBLE_MOVES if self._board[it.position[::-1]] == -1 or self._board[it.position[::-1]] == player] - + def valid_moves(self, player: int, filter_duplicates: bool = True) -> tuple[CompleteMove]: + valids = [it for it in POSSIBLE_MOVES if self._board[it.position[::-1]] == -1 or self._board[it.position[::-1]] == player] + if not filter_duplicates: + return valids + s = defaultdict(list) + for valid in valids: + copy = deepcopy(self) + copy._Game__move(*valid, player) + s[str(copy)].append(valid) + non_duplicate = [] + for _, moves in s.items(): + non_duplicate.append(moves[0]) + return tuple(non_duplicate) + def score(self) -> int: + winner = self.check_winner() + if winner != -1: + return (5**5) * 1 if winner == self.current_player_idx else -1 + transposed = self._board.transpose() + + x_score = [] + o_score = [] + for row, column in zip(self._board, transposed): + x_score.append(sum(row == 0)) + x_score.append(sum(column == 0)) + o_score.append(sum(row == 1)) + o_score.append(sum(column == 1)) + + diag = self._board.diagonal() + second_diag = self._board[:, ::-1].diagonal() + + x_score.append(sum(diag == 0)) + o_score.append(sum(diag == 1)) + x_score.append(sum(second_diag == 0)) + o_score.append(sum(second_diag == 1)) + + score_x, score_o = 5**max(x_score), 5**max(o_score) + score = score_x - score_o + score *= 1 if self.current_player_idx == 0 else -1 + return score if __name__ == "__main__": from random import choice diff --git a/ci_quixo/helper.py b/ci_quixo/helper.py index ff6161b..1ebca50 100644 --- a/ci_quixo/helper.py +++ b/ci_quixo/helper.py @@ -1,8 +1,10 @@ import numpy as np from typing import TYPE_CHECKING - +from main import RandomPlayer, Game +from tqdm.auto import trange if TYPE_CHECKING: from game import Game + from main import Player def pprint_board(game: "Game"): board: np.ndarray = game.get_board() @@ -15,3 +17,22 @@ def pprint_board(game: "Game"): print(c, end="") print() +def evaluate(p1: "Player", p2: "Player" = None, games: int = 10, display: bool = False) -> tuple[int]: + if p2 is None: + p2 = RandomPlayer() + won_as_first, won_as_second = 0, 0 + for i in trange(games, desc="Evaluating player", unit="game"): + game = Game() + if i % 2 == 0: + won_as_first += 1 if game.play(p1, p2) == 0 else 0 + else: + won_as_second += 1 if game.play(p2, p1) == 1 else 0 + wins = won_as_first + won_as_second + wins /= games + won_as_first /= games/2 + won_as_second /= games/2 + if display: + print(f"Total wins : {wins:.2%}") + print(f"Wins as 1st: {won_as_first:.2%}") + print(f"Wins as 2nd: {won_as_second:.2%}") + return wins, won_as_first, won_as_second \ No newline at end of file diff --git a/ci_quixo/minmax.py b/ci_quixo/minmax.py index 032d298..5d9a190 100644 --- a/ci_quixo/minmax.py +++ b/ci_quixo/minmax.py @@ -10,63 +10,77 @@ class MinMaxPlayer(Player): - def __init__(self, max_depth: int = None, use_alpha_beta_pruning: bool = False) -> None: + def __init__(self, max_depth: int = None, use_alpha_beta_pruning: bool = False, verbose: bool = False) -> None: super().__init__() self.max_depth = max_depth self.use_alpha_beta_pruning = use_alpha_beta_pruning - self._init_ab() - - def _init_ab(self): - self._alpha, self._beta = -np.inf, np.inf - + self.verbose = verbose def make_move(self, game: Game) -> "CompleteMove": + self.verbose and print("Deciding move on the following board") cg = CustomGame.from_game(game) - best_move = self._minmax(0, cg, True)[1] + self.verbose and cg.pprint() + best_move = self._minmax(cg) if best_move is None: - best_move = random.choice(cg.valid_moves()) - print("I made a move...") + best_move = random.choice(cg.valid_moves(cg.get_current_player())) + cg._Game__move(*best_move, cg.current_player_idx) + self.verbose and print(f"Played {best_move=}") + self.verbose and cg.pprint() return best_move - def _minmax(self, depth: int, game: "CustomGame", maximixe: bool) -> tuple[float, "CompleteMove"]: - winner = game.check_winner() - if winner != -1: - return 25 * winner, None - if self.max_depth is not None and depth >= self.max_depth: - return 0, None + def _minmax(self, game: "CustomGame") -> "CompleteMove": - if depth == 0: - self._init_ab() + def min_side(self: "MinMaxPlayer", game: "CustomGame", alpha: int, beta: int, depth: int) -> int: + winner = game.check_winner() + if (self.max_depth is not None and depth >= self.max_depth) or winner != -1: + return game.score() + + min_found = np.infty - best_move = None - if maximixe: - for move in game.valid_moves(game.get_current_player()): - copied = deepcopy(game) - assert copied._Game__move(*move, copied.current_player_idx), f"Somehow got an invalid move while iterating from valid moves, {copied}, {move}" - score, _ = self._minmax(depth+1, copied, False) - if score > self._alpha: - self._alpha = score - best_move = move - - if self.use_alpha_beta_pruning and self._alpha > self._beta: + for move in game.valid_moves(game.current_player_idx): + copy = deepcopy(game) + assert copy._Game__move(*move, copy.current_player_idx), "Somehow move was invalid?????" + copy.current_player_idx = 1-copy.current_player_idx + min_found = min(min_found, max_side(self, game, alpha, beta, depth+1)) + beta = min(beta, min_found) + if alpha > beta and self.use_alpha_beta_pruning: break - return self._alpha, best_move - else: - for move in game.valid_moves(game.get_current_player()): - copied = deepcopy(game) - assert copied._Game__move(*move, copied.current_player_idx), "Somehow got an invalid move while iterating from valid moves" - score, _ = self._minmax(depth+1, copied, True) - if score < self._beta: - self._beta = score - best_move = move + return min_found + + + def max_side(self: "MinMaxPlayer", game: "CustomGame", alpha: int, beta: int, depth: int) -> int: + winner = game.check_winner() + if (self.max_depth is not None and depth >= self.max_depth) or winner != -1: + return game.score() - if self.use_alpha_beta_pruning and self._alpha > self._beta: + max_found = -np.infty + + for move in game.valid_moves(game.current_player_idx): + copy = deepcopy(game) + assert copy._Game__move(*move, copy.current_player_idx), "Somehow move was invalid?????" + copy.current_player_idx = 1-copy.current_player_idx + max_found = max(max_found, min_side(self, game, alpha, beta, depth+1)) + alpha = max(alpha, max_found) + if alpha > beta and self.use_alpha_beta_pruning: break - return self._beta, best_move + return max_found + + best_move = None + alpha, beta = -np.inf, np.inf + + for move in game.valid_moves(game.current_player_idx): + copy = deepcopy(game) + assert copy._Game__move(*move, copy.current_player_idx), "Somehow move was invalid?????" + copy.current_player_idx = 1-copy.current_player_idx + min_score = min_side(self, game, alpha, beta, 1) + if min_score > alpha: + alpha = min_score + best_move = move + self.verbose and print(f"Found best move with score {alpha}") + return best_move + if __name__ == "__main__": - from main import RandomPlayer - mm = MinMaxPlayer(20, True) - rp = RandomPlayer() - game = Game() \ No newline at end of file + from helper import evaluate + evaluate(MinMaxPlayer(3, True), None, 10, True) \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index cac6624..9937c78 100644 --- a/poetry.lock +++ b/poetry.lock @@ -113,7 +113,27 @@ pluggy = ">=0.12,<2.0" [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "tqdm" +version = "4.66.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"}, + {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [metadata] lock-version = "2.0" python-versions = "^3.12" -content-hash = "9358020b2cb83e76f52968bbdb006ba21dd0835121b3f77282e8782b71ff545c" +content-hash = "8d383a5ccf2272dfdc8f57d7177fcca13aaf9b7e6042e1534f09cb05fbbfb8e5" diff --git a/pyproject.toml b/pyproject.toml index cbdb6ee..b32004f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ readme = "README.md" python = "^3.12" numpy = "^1.26.3" pytest = "^7.4.4" +tqdm = "^4.66.1" [build-system]