feat: simulate state will now correctly provide any board state in co…

…nnect4 represented as a string.
CogitoNTNU · Apr 27, 2024 · e652b0a · e652b0a
1 parent 81046d8
commit e652b0a
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 16 deletions.
diff --git a/src/alphazero/debug/simulate_state.py b/src/alphazero/debug/simulate_state.py
@@ -5,6 +5,9 @@
 from src.utils.game_context import GameContext
 from icecream import ic
 
+def replace_char(s: str, ch: str, i: int) -> str: 
+    return s[:i] + ch + s[i + 1:]
+
 def string_to_state(game, board_string: str):
     """
     This function is not perfect and will not work for many board states.
@@ -17,18 +20,57 @@ def string_to_state(game, board_string: str):
         raise ValueError("Invalid board string")
 
     while 'x' in flatten or 'o' in flatten:
-        for i in reversed(range(len(rows))):
-            index = rows[i].find('x')
-            if index != -1:
-                state.apply_action(index)
-                rows[i] = rows[i].replace('x', '.', 1) # Remove the piece
+
+        while 'x' in flatten:
+
+            piece_played = False
+
+            for i in reversed(range(1, len(rows))):
+
+                index = rows[i].find('x')
+                if index != -1:
+                    if rows[i - 1][index] == 'o':
+                        state.apply_action(index)
+                        rows[i] = replace_char(rows[i], '.', index)
+                        piece_played = True
+                        break
+
+            if piece_played:
                 break
-        for i in reversed(range(len(rows))):
-            index = rows[i].find('o')
-            if index != -1:
-                state.apply_action(index)
-                rows[i] = rows[i].replace('o', '.', 1) # Remove the piece
+
+            for i in reversed(range(len(rows))):
+                index = rows[i].find('x')
+                if index != -1:
+                    state.apply_action(index)
+                    rows[i] = replace_char(rows[i], '.', index)
+                    break
+            break
+
+        while 'o' in flatten:
+
+            piece_played = False
+
+            for i in reversed(range(1, len(rows))):
+
+                index = rows[i].find('o')
+                if index != -1:
+                    if rows[i - 1][index] == 'x':
+                        state.apply_action(index)
+                        rows[i] = replace_char(rows[i], '.', index)
+                        piece_played = True
+                        break
+
+            if piece_played:
                 break
+
+            for i in reversed(range(len(rows))):
+                index = rows[i].find('o')
+                if index != -1:
+                    state.apply_action(index)
+                    rows[i] = replace_char(rows[i], '.', index)
+                    break
+            break
+
         flatten = ''.join(rows)
     return state
 
@@ -40,15 +82,24 @@ def main():
     context = GameContext(game_str, nn, save_path)
 
     alphazero = AlphaZero(context)
-    board_string = """
+
+    initial_string ="""
+.......
+.......
+.......
 .......
 .......
 .......
-...o...
-x..o...
-xx.ooxx
     """
-    state = string_to_state(context.game, board_string)
+    board_string = """
+.......
+.......
+...x...
+o..oox.
+x..oxxo
+xxoooxx
+    """
+    state = string_to_state(context.game, initial_string)
     ic(state)
     result = alphazero.run_simulation(state)
     print("Simulation result:", result)

diff --git a/src/alphazero/node.py b/src/alphazero/node.py
@@ -10,7 +10,7 @@ def __init__(self, parent: "Node", state: pyspiel.State, action: int, policy_val
         """
         A list of game states you can reach from the current node.
         """
-        # TODO: ASSIGN TORCH TENSOR WITH POLICY VALUES TO CHILDREN
+
         self.parent: 'Node' = parent
         """
         The node representing the state which came before the current node.