ConnectN/alpha_beta_agent.py at main · T-Goon/ConnectN · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import agent
import board

###########################
# Alpha-Beta Search Agent #
###########################

class AlphaBetaAgent(agent.Agent):
    """Agent that uses alpha-beta search"""

    # Class constructor.
    #
    # PARAM [string] name:      the name of this player
    # PARAM [int]    max_depth: the maximum search depth
    def __init__(self, name, max_depth):
        super().__init__(name)
        # Max search depth
        self.max_depth = max_depth
        self.col_order = None

    # Calculates the column order given the board width, and memoizes the result.
    # PARAM [int] width: The board width
    def cache_col_order(self, width):
        # Do not recompute if not needed
        if self.col_order != None and self.col_order == width:
            return
        self.col_order = list()
        # Count down from the middle (rounded up)
        for i in range((width + 1) >> 1, 0, -1):
            # Compute left and right values and add to list if not present.
            self.col_order.append(i - 1)
            v = (width + 1) - i - 1
            if v in self.col_order:
                continue
            self.col_order.append(v)

    # Pick a column.
    #
    # PARAM [board.Board] brd: the current board state
    # RETURN [int]: the column where the token must be added
    #
    # NOTE: make sure the column is legal, or you'll lose the game.
    def go(self, brd):
        """Search for the best move (choice of column for the token)"""
        self.cache_col_order(brd.w)

        # Negamax
        v, action = self.negamax(brd, float('-inf'), float('inf'), None, 1, brd.player)

        return action

    # Gets the score of a non-terminal board.
     # PARAM [board.Board] board: The current state of the board.
     # PARAM [int] player: The value of a player's token. [1|2]
     # PARAM [int] other: The value of the opponent's token. [1|2]
     # RETURN [int]: Summed score for tokens t on the board.
    def get_board_score(self, board, player, other):
        """Caluclate a score for the entire board. """

        # calc for both you and oponent
        sum = 0
        sum += self.count_all(board, player)
        sum -= self.count_all(board, other)

        return sum

    # Gets the score of all tokens on the board.
     # PARAM [board.Board] board: The current state of the board.
     # PARAM [int] t: The value of a player's token. [1|2]
     # RETURN [int]: Summed score for tokens t on the board.
    def count_all(self, board, t):
        """ Get the sum of all tokens on the board """

        sum = 0
        # Go through all spaces on the board
        for i in range(board.h):
            for j in range(board.w):
                # only eval for the specified token
                if(board.board[i][j] == t):
                    sum += self.tokenScore(board, j, i, t)

        return sum

    # Gets the score of a token in all directions.
     # PARAM [board.Board] board: The current state of the board.
     # PARAM [int] x: The column of the token on the board.
     # PARAM [int] y: The row of the token on the board.
     # PARAM [int] t: The value of a player's token. [1|2]
     # RETURN [int]: Summed score for all 8 directions of a token.
    def tokenScore(self, board, x, y, t):
        """ Look in all directions for 0 tokens and t tokens """

        # Look in all directions for 0 tokens and t tokens
        sum = 0
        sum += self.countLine(board, x, y, 0, 1, t) # up
        sum += self.countLine(board, x, y, 1, 1, t) # diagonal up right
        sum += self.countLine(board, x, y, 1, 0, t) # right
        sum += self.countLine(board, x, y, 1, -1, t) # diagonal down right
        sum += self.countLine(board, x, y, 0, -1, t) # down
        sum += self.countLine(board, x, y, -1, -1, t) # diagonal down left
        sum += self.countLine(board, x, y, -1, 0, t) # left
        sum += self.countLine(board, x, y, -1, 1, t) # diagonal up left

        return sum

    # Gets the score of a token in one direction.
     # PARAM [board.Board] board: The current state of the board.
     # PARAM [int] x: The column of the token on the board.
     # PARAM [int] y: The row of the token on the board.
     # PARAM [int] dx: The x direction to look in. [-1|0|1]
     # PARAM [int] dy: The y direction to look in. [-1|0|1]
     # PARAM [int] t: The value of a player's token. [1|2]
     # RETURN [int]: Summed score for a single direction of a token.
    def countLine(self, board, x, y, dx, dy, t):
        """ Count the score of a token in one direction """
        sum = 0
        b = board.board

        for i in range(1, board.n):
            try:
                row = y+(dy*i)
                col = x+(dx*i)
                # stop python from using negetive indexes
                if(row < 0 or col < 0):
                    return 0

                if (t == b[row][col]): # Found your token
                    sum += 2
                elif (0 == b[row][col]): # Found no token
                    sum += 1
                else:
                    # discard when you encounter a oposing token
                    return 0

            except Exception:
                # discard when you go out of bounds
                return 0

        return sum

    # Calculates the optimal move and value of the given board for the given player.
    # PARAM [board.Board] board: The board state to check.
    # PARAM [int] a: The alpha value for pruning
    # PARAM [int] b: The beta value for pruning
    # PARAM [int] old_action: The column played to reach this board state
    # PARAM [int] player: The player to run the search for [1|2]
    def negamax(self, board, a, b, old_action, depth, player):
        # Cache board outcome, since get_outcome is somewhat expensive to calculate.
        winner = board.get_outcome()
        other_player = (player % 2) + 1
        successors = self.get_successors(board)

        # Immediate win or loss is calculated here to avoid recomputing outcome.
        # It could also be done in get_board_score.

        # Check for immediate win
        if winner == player:
            return 1000000 / depth, old_action
        # Check for immediate loss
        if winner == other_player:
            return -1000000 / depth, old_action
        # Max depth, or no successors (tie)
        if depth == self.max_depth or len(successors) == 0:
            return self.get_board_score(board, player, other_player), old_action

        # Standard negamax implementation
        value = float('-inf')
        action = None
        for next_board in self.get_successors(board):
            # It is notable that `nv` is negated every time it is used, this is a key property of negamax.
            nv, new_action = self.negamax(next_board[0], -b, -a, next_board[1], depth + 1, other_player)

            if -nv > value:
                value = -nv
                action = next_board[1]

            a = max(a, value)

            if a >= b:
                return value, new_action

        return value, action

    # Get the successors of the given board in order of middle outwards.
    # PARAM [board.Board] brd: the board state
    # RETURN [list of (board.Board, int)]: a list of the successor boards,
    #                                      along with the column where the last
    #                                      token was added in it
    def get_successors(self, brd):
        """Returns the reachable boards from the given board brd. The return value is a tuple (new board state, column number where last token was added)."""
        # Get possible actions (middle favoring)
        freecols = [x for x in self.col_order if brd.board[-1][x] == 0 ]

        # Are there legal actions left?
        if not freecols:
            return []

        # Make a list of the new boards along with the corresponding actions
        succ = []
        # print(freecols)
        for col in freecols:
            # Clone the original board
            nb = board.Board([row[:] for row in brd.board], brd.w, brd.h, brd.n)
            nb.player = brd.player

            # Add a token to the new board
            # (This internally changes nb.player, check the method definition!)
            nb.add_token(col)
            # Add board to list of successors
            succ.append((nb,col))
        return succ

THE_AGENT = AlphaBetaAgent("Group20", 6)