Skip to content

Commit

Permalink
minmaxed a bit
Browse files Browse the repository at this point in the history
  • Loading branch information
Krasto committed Jan 7, 2024
1 parent 8d1ce8e commit 8258b4f
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 49 deletions.
44 changes: 40 additions & 4 deletions ci_quixo/custom_game.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
from game import Game, Move
from collections import namedtuple
from collections import namedtuple, defaultdict
from copy import deepcopy

Position = namedtuple("Position", ["x", "y"], defaults=[0, 0])
Expand Down Expand Up @@ -110,9 +110,45 @@ def __hash__(self) -> str:
def __eq__(self, other: "CustomGame") -> bool:
return self.__hash__() == other.__hash__()

def valid_moves(self, player: int) -> tuple[CompleteMove]:
return [it for it in POSSIBLE_MOVES if self._board[it.position[::-1]] == -1 or self._board[it.position[::-1]] == player]

def valid_moves(self, player: int, filter_duplicates: bool = True) -> tuple[CompleteMove]:
valids = [it for it in POSSIBLE_MOVES if self._board[it.position[::-1]] == -1 or self._board[it.position[::-1]] == player]
if not filter_duplicates:
return valids
s = defaultdict(list)
for valid in valids:
copy = deepcopy(self)
copy._Game__move(*valid, player)
s[str(copy)].append(valid)
non_duplicate = []
for _, moves in s.items():
non_duplicate.append(moves[0])
return tuple(non_duplicate)
def score(self) -> int:
winner = self.check_winner()
if winner != -1:
return (5**5) * 1 if winner == self.current_player_idx else -1
transposed = self._board.transpose()

x_score = []
o_score = []
for row, column in zip(self._board, transposed):
x_score.append(sum(row == 0))
x_score.append(sum(column == 0))
o_score.append(sum(row == 1))
o_score.append(sum(column == 1))

diag = self._board.diagonal()
second_diag = self._board[:, ::-1].diagonal()

x_score.append(sum(diag == 0))
o_score.append(sum(diag == 1))
x_score.append(sum(second_diag == 0))
o_score.append(sum(second_diag == 1))

score_x, score_o = 5**max(x_score), 5**max(o_score)
score = score_x - score_o
score *= 1 if self.current_player_idx == 0 else -1
return score

if __name__ == "__main__":
from random import choice
Expand Down
23 changes: 22 additions & 1 deletion ci_quixo/helper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import numpy as np
from typing import TYPE_CHECKING

from main import RandomPlayer, Game
from tqdm.auto import trange
if TYPE_CHECKING:
from game import Game
from main import Player

def pprint_board(game: "Game"):
board: np.ndarray = game.get_board()
Expand All @@ -15,3 +17,22 @@ def pprint_board(game: "Game"):
print(c, end="")
print()

def evaluate(p1: "Player", p2: "Player" = None, games: int = 10, display: bool = False) -> tuple[int]:
if p2 is None:
p2 = RandomPlayer()
won_as_first, won_as_second = 0, 0
for i in trange(games, desc="Evaluating player", unit="game"):
game = Game()
if i % 2 == 0:
won_as_first += 1 if game.play(p1, p2) == 0 else 0
else:
won_as_second += 1 if game.play(p2, p1) == 1 else 0
wins = won_as_first + won_as_second
wins /= games
won_as_first /= games/2
won_as_second /= games/2
if display:
print(f"Total wins : {wins:.2%}")
print(f"Wins as 1st: {won_as_first:.2%}")
print(f"Wins as 2nd: {won_as_second:.2%}")
return wins, won_as_first, won_as_second
100 changes: 57 additions & 43 deletions ci_quixo/minmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,63 +10,77 @@

class MinMaxPlayer(Player):

def __init__(self, max_depth: int = None, use_alpha_beta_pruning: bool = False) -> None:
def __init__(self, max_depth: int = None, use_alpha_beta_pruning: bool = False, verbose: bool = False) -> None:
super().__init__()

self.max_depth = max_depth
self.use_alpha_beta_pruning = use_alpha_beta_pruning
self._init_ab()

def _init_ab(self):
self._alpha, self._beta = -np.inf, np.inf

self.verbose = verbose

def make_move(self, game: Game) -> "CompleteMove":
self.verbose and print("Deciding move on the following board")
cg = CustomGame.from_game(game)
best_move = self._minmax(0, cg, True)[1]
self.verbose and cg.pprint()
best_move = self._minmax(cg)
if best_move is None:
best_move = random.choice(cg.valid_moves())
print("I made a move...")
best_move = random.choice(cg.valid_moves(cg.get_current_player()))
cg._Game__move(*best_move, cg.current_player_idx)
self.verbose and print(f"Played {best_move=}")
self.verbose and cg.pprint()
return best_move

def _minmax(self, depth: int, game: "CustomGame", maximixe: bool) -> tuple[float, "CompleteMove"]:
winner = game.check_winner()
if winner != -1:
return 25 * winner, None
if self.max_depth is not None and depth >= self.max_depth:
return 0, None
def _minmax(self, game: "CustomGame") -> "CompleteMove":

if depth == 0:
self._init_ab()
def min_side(self: "MinMaxPlayer", game: "CustomGame", alpha: int, beta: int, depth: int) -> int:
winner = game.check_winner()
if (self.max_depth is not None and depth >= self.max_depth) or winner != -1:
return game.score()

min_found = np.infty

best_move = None
if maximixe:
for move in game.valid_moves(game.get_current_player()):
copied = deepcopy(game)
assert copied._Game__move(*move, copied.current_player_idx), f"Somehow got an invalid move while iterating from valid moves, {copied}, {move}"
score, _ = self._minmax(depth+1, copied, False)
if score > self._alpha:
self._alpha = score
best_move = move

if self.use_alpha_beta_pruning and self._alpha > self._beta:
for move in game.valid_moves(game.current_player_idx):
copy = deepcopy(game)
assert copy._Game__move(*move, copy.current_player_idx), "Somehow move was invalid?????"
copy.current_player_idx = 1-copy.current_player_idx
min_found = min(min_found, max_side(self, game, alpha, beta, depth+1))
beta = min(beta, min_found)
if alpha > beta and self.use_alpha_beta_pruning:
break
return self._alpha, best_move
else:
for move in game.valid_moves(game.get_current_player()):
copied = deepcopy(game)
assert copied._Game__move(*move, copied.current_player_idx), "Somehow got an invalid move while iterating from valid moves"
score, _ = self._minmax(depth+1, copied, True)
if score < self._beta:
self._beta = score
best_move = move
return min_found


def max_side(self: "MinMaxPlayer", game: "CustomGame", alpha: int, beta: int, depth: int) -> int:
winner = game.check_winner()
if (self.max_depth is not None and depth >= self.max_depth) or winner != -1:
return game.score()

if self.use_alpha_beta_pruning and self._alpha > self._beta:
max_found = -np.infty

for move in game.valid_moves(game.current_player_idx):
copy = deepcopy(game)
assert copy._Game__move(*move, copy.current_player_idx), "Somehow move was invalid?????"
copy.current_player_idx = 1-copy.current_player_idx
max_found = max(max_found, min_side(self, game, alpha, beta, depth+1))
alpha = max(alpha, max_found)
if alpha > beta and self.use_alpha_beta_pruning:
break
return self._beta, best_move
return max_found

best_move = None
alpha, beta = -np.inf, np.inf

for move in game.valid_moves(game.current_player_idx):
copy = deepcopy(game)
assert copy._Game__move(*move, copy.current_player_idx), "Somehow move was invalid?????"
copy.current_player_idx = 1-copy.current_player_idx
min_score = min_side(self, game, alpha, beta, 1)
if min_score > alpha:
alpha = min_score
best_move = move
self.verbose and print(f"Found best move with score {alpha}")
return best_move


if __name__ == "__main__":
from main import RandomPlayer
mm = MinMaxPlayer(20, True)
rp = RandomPlayer()
game = Game()
from helper import evaluate
evaluate(MinMaxPlayer(3, True), None, 10, True)
22 changes: 21 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ readme = "README.md"
python = "^3.12"
numpy = "^1.26.3"
pytest = "^7.4.4"
tqdm = "^4.66.1"


[build-system]
Expand Down

0 comments on commit 8258b4f

Please sign in to comment.