games/tictactoe.py

from __future__ import annotations

import typing as tp
from dataclasses import dataclass
from functools import cache, cached_property

Player = tp.Literal["X", "O"]
Draw = tp.Literal["-"]
Square = tp.Literal["X", "O", " "]
Board = tuple[Square, Square, Square, Square, Square, Square, Square, Square, Square]


@dataclass(frozen=True)
class GameState:
    player: Player
    board: Board

    def __str__(self):
        b = self.board
        return (
            f" {b[0]} │ {b[1]} │ {b[2]}\n"
            "───┼───┼───\n"
            f" {b[3]} │ {b[4]} │ {b[5]}\n"
            "───┼───┼───\n"
            f" {b[6]} │ {b[7]} │ {b[8]}"
        )


@dataclass(frozen=True)
class Move:
    player: Player
    position: int


def get_valid_moves(state: GameState) -> tuple[Move]:
    return tuple(
        Move(state.player, position=i)
        for i, square in enumerate(state.board)
        if square == " "
    )


@cache
def apply_move(state: GameState, move: Move) -> GameState:
    new_player = "X" if move.player == "O" else "O"

    new_board = list(state.board)
    new_board[move.position] = move.player
    new_board = tuple(new_board)

    return GameState(player=new_player, board=new_board)


@cache
def get_winner(state: GameState) -> Player | Draw | None:
    # abc
    # def
    # ghi
    a, b, c, d, e, f, g, h, i = state.board

    # horizontal
    if a == b == c and a != " ":
        return a
    if d == e == f and d != " ":
        return d
    if g == h == i and g != " ":
        return g

    # vertical
    if a == d == g and a != " ":
        return a
    if b == e == h and b != " ":
        return b
    if c == f == i and c != " ":
        return c

    # diagonal
    if a == e == i and a != " ":
        return a
    if c == e == g and c != " ":
        return c

    # draw
    if not any(square == " " for square in state.board):
        return "-"

    # no winner
    return None


# monte-carlo style tree search

START = GameState(player="X", board=(" ",) * 9)

# with dynamic programming B)
# next_states: dict[GameState, tuple[GameState]] = {}
total_nodes = 0


@cache
def get_next_states(state: GameState) -> tuple[GameState]:
    if get_winner(state) is not None:
        return tuple()
    return tuple(apply_move(state, move) for move in get_valid_moves(state))


@cache
def get_score(state: GameState, target: Player) -> float:
    global total_nodes
    total_nodes += 1
    opponent = "X" if target == "O" else "O"
    winner = get_winner(state)
    if winner == target:
        return float("inf")
    if winner == "-":
        return 0
    if winner == opponent:
        return float("-inf")

    assert winner is None
    next_scores = (
        get_score(next_state, target) for next_state in get_next_states(state)
    )
    if state.player == target:
        # target player plays at this node
        return max(next_scores)
    else:
        # opponent plays at this node
        return min(next_scores)


if __name__ == "__main__":
    # TODO: "get best move"
    print(f"{START=}, {get_score(START, 'X')=}")
    print(f"{total_nodes=}")
tic tac toe monte carlo style 2023-06-01 07:58:45 +00:00			`from __future__ import annotations`

			`import typing as tp`
			`from dataclasses import dataclass`
			`from functools import cache, cached_property`

			`Player = tp.Literal["X", "O"]`
			`Draw = tp.Literal["-"]`
			`Square = tp.Literal["X", "O", " "]`
			`Board = tuple[Square, Square, Square, Square, Square, Square, Square, Square, Square]`


			`@dataclass(frozen=True)`
			`class GameState:`
			`player: Player`
			`board: Board`

			`def __str__(self):`
			`b = self.board`
			`return (`
			`f" {b[0]} │ {b[1]} │ {b[2]}\n"`
			`"───┼───┼───\n"`
			`f" {b[3]} │ {b[4]} │ {b[5]}\n"`
			`"───┼───┼───\n"`
			`f" {b[6]} │ {b[7]} │ {b[8]}"`
			`)`


			`@dataclass(frozen=True)`
			`class Move:`
			`player: Player`
			`position: int`


			`def get_valid_moves(state: GameState) -> tuple[Move]:`
			`return tuple(`
			`Move(state.player, position=i)`
			`for i, square in enumerate(state.board)`
			`if square == " "`
			`)`


			`@cache`
			`def apply_move(state: GameState, move: Move) -> GameState:`
			`new_player = "X" if move.player == "O" else "O"`

			`new_board = list(state.board)`
			`new_board[move.position] = move.player`
			`new_board = tuple(new_board)`

			`return GameState(player=new_player, board=new_board)`


dynamic programming 2023-06-01 08:17:35 +00:00			`@cache`
tic tac toe monte carlo style 2023-06-01 07:58:45 +00:00			`def get_winner(state: GameState) -> Player \| Draw \| None:`
			`# abc`
			`# def`
			`# ghi`
			`a, b, c, d, e, f, g, h, i = state.board`

			`# horizontal`
			`if a == b == c and a != " ":`
			`return a`
			`if d == e == f and d != " ":`
			`return d`
			`if g == h == i and g != " ":`
			`return g`

			`# vertical`
			`if a == d == g and a != " ":`
			`return a`
			`if b == e == h and b != " ":`
			`return b`
			`if c == f == i and c != " ":`
			`return c`

			`# diagonal`
			`if a == e == i and a != " ":`
			`return a`
			`if c == e == g and c != " ":`
			`return c`

			`# draw`
			`if not any(square == " " for square in state.board):`
			`return "-"`

			`# no winner`
			`return None`


			`# monte-carlo style tree search`

			`START = GameState(player="X", board=(" ",) * 9)`

remove non-dynamic programming approach 2023-06-01 08:19:52 +00:00			`# with dynamic programming B)`
			`# next_states: dict[GameState, tuple[GameState]] = {}`
			`total_nodes = 0`
tic tac toe monte carlo style 2023-06-01 07:58:45 +00:00

remove non-dynamic programming approach 2023-06-01 08:19:52 +00:00			`@cache`
			`def get_next_states(state: GameState) -> tuple[GameState]:`
			`if get_winner(state) is not None:`
			`return tuple()`
			`return tuple(apply_move(state, move) for move in get_valid_moves(state))`
tic tac toe monte carlo style 2023-06-01 07:58:45 +00:00

remove non-dynamic programming approach 2023-06-01 08:19:52 +00:00			`@cache`
			`def get_score(state: GameState, target: Player) -> float:`
			`global total_nodes`
			`total_nodes += 1`
			`opponent = "X" if target == "O" else "O"`
			`winner = get_winner(state)`
			`if winner == target:`
			`return float("inf")`
			`if winner == "-":`
			`return 0`
			`if winner == opponent:`
			`return float("-inf")`

			`assert winner is None`
			`next_scores = (`
			`get_score(next_state, target) for next_state in get_next_states(state)`
			`)`
			`if state.player == target:`
			`# target player plays at this node`
			`return max(next_scores)`
			`else:`
			`# opponent plays at this node`
			`return min(next_scores)`
he doesnt know about dynamic programming :) 2023-06-01 08:05:25 +00:00

tic tac toe monte carlo style 2023-06-01 07:58:45 +00:00			`if __name__ == "__main__":`
remove non-dynamic programming approach 2023-06-01 08:19:52 +00:00			`# TODO: "get best move"`
			`print(f"{START=}, {get_score(START, 'X')=}")`
			`print(f"{total_nodes=}")`