-
Notifications
You must be signed in to change notification settings - Fork 1
/
Chess_env.py
215 lines (130 loc) · 8.8 KB
/
Chess_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import numpy as np
from degree_freedom_queen import *
from degree_freedom_king1 import *
from degree_freedom_king2 import *
from generate_game import *
class Chess_Env:
def __init__(self,N_grid):
self.N_grid=N_grid # SIZE OF THE BOARD
self.Board=np.zeros([N_grid,N_grid]) # THE BOARD, THIS WILL BE FILLED BY 0 (NO PIECE), 1 (AGENT'S KING), 2 (AGENT'S QUEEN), 3 (OPPONENT'S KING)
self.p_k1=np.zeros([2,1]) # POSITION OF THE AGENT'S KING AS COORDINATES
self.p_k2=np.zeros([2,1]) # POSITION OF THE OPPOENT'S KING AS COORDINATES
self.p_q1=np.zeros([2,1]) # POSITION OF THE AGENT'S QUEEN AS COORDINATES
self.dfk1=np.zeros([N_grid,N_grid]) # ALL POSSIBLE ACTIONS FOR THE AGENT'S KING (LOCATIONS WHERE IT CAN MOVE WITHOUT THE PRESENCE OF THE OTHER PIECES)
self.dfk2=np.zeros([N_grid,N_grid]) # ALL POSSIBLE ACTIONS FOR THE OPPONENT'S KING (LOCATIONS WHERE IT CAN MOVE WITHOUT THE PRESENCE OF THE OTHER PIECES)
self.dfq1=np.zeros([N_grid,N_grid]) # ALL POSSIBLE ACTIONS FOR THE AGENT'S QUEEN (LOCATIONS WHERE IT CAN MOVE WITHOUT THE PRESENCE OF THE OTHER PIECES)
self.dfk1_constrain=np.zeros([N_grid,N_grid]) # ALLOWED ACTIONS FOR THE AGENT'S KING CONSIDERING ALSO THE OTHER PIECES
self.dfk2_constrain=np.zeros([N_grid,N_grid]) # ALLOWED ACTIONS FOT THE OPPONENT'S KING CONSIDERING ALSO THE OTHER PIECES
self.dfq1_constrain=np.zeros([N_grid,N_grid]) # ALLOWED ACTIONS FOT THE AGENT'S QUEEN CONSIDERING ALSO THE OTHER PIECES
self.ak1=np.zeros([8]) # ALLOWED ACTIONS OF THE AGENT'S KING (CONSIDERING OTHER PIECES), ONE-HOT ENCODED
self.possible_king_a=np.shape(self.ak1)[0] # TOTAL NUMBER OF POSSIBLE ACTIONS FOR AGENT'S KING
self.aq1=np.zeros([8*(self.N_grid-1)]) # ALLOWED ACTIONS OF THE AGENT'S QUEEN (CONSIDERING OTHER PIECES), ONE-HOT ENCODED
self.possible_queen_a=np.shape(self.aq1)[0] # TOTAL NUMBER OF POSSIBLE ACTIONS FOR AGENT'S QUEEN
self.check=0 # 1 (0) IF ENEMY KING (NOT) IN CHECK
# THIS MAP IS USEFUL FOR US TO UNDERSTAND THE DIRECTION OF MOVEMENT GIVEN THE ACTION MADE (SKIP...)
self.map=np.array([[1, 0],
[-1, 0],
[0, 1],
[0, -1],
[1, 1],
[1, -1],
[-1, 1],
[-1, -1]])
def Initialise_game(self):
# START THE GAME BY SETTING PIECIES
self.Board,self.p_k2,self.p_k1,self.p_q1=generate_game(self.N_grid)
# Allowed actions for the agent's king
self.dfk1_constrain, self.a_k1, self.dfk1 = degree_freedom_king1(self.p_k1, self.p_k2, self.p_q1, self.Board)
# Allowed actions for the agent's queen
self.dfq1_constrain, self.a_q1, self.dfq1 = degree_freedom_queen(self.p_k1, self.p_k2, self.p_q1, self.Board)
# Allowed actions for the enemy's king
self.dfk2_constrain, self.a_k2, self.check = degree_freedom_king2(self.dfk1, self.p_k2, self.dfq1, self.Board, self.p_k1)
# ALLOWED ACTIONS FOR THE AGENT, ONE-HOT ENCODED
allowed_a=np.concatenate([self.a_q1,self.a_k1],0)
# FEATURES (INPUT TO NN) AT THIS POSITION
X=self.Features()
return self.Board, X, allowed_a
def OneStep(self,a_agent):
# SET REWARD TO ZERO IF GAME IS NOT ENDED
R=0
# SET Done TO ZERO (GAME NOT ENDED)
Done=0
# PERFORM THE AGENT'S ACTION ON THE CHESS BOARD
if a_agent < self.possible_queen_a: # THE AGENT MOVED ITS QUEEN
# UPDATE QUEEN'S POSITION
direction = int(np.ceil((a_agent + 1) / (self.N_grid - 1))) - 1
steps = a_agent - direction * (self.N_grid - 1) + 1
self.Board[self.p_q1[0], self.p_q1[1]] = 0
mov = self.map[direction, :] * steps
self.Board[self.p_q1[0] + mov[0], self.p_q1[1] + mov[1]] = 2
self.p_q1[0] = self.p_q1[0] + mov[0]
self.p_q1[1] = self.p_q1[1] + mov[1]
else: # THE AGENT MOVED ITS KING
# UPDATE KING'S POSITION
direction = a_agent - self.possible_queen_a
steps = 1
self.Board[self.p_k1[0], self.p_k1[1]] = 0
mov = self.map[direction, :] * steps
self.Board[self.p_k1[0] + mov[0], self.p_k1[1] + mov[1]] = 1
self.p_k1[0] = self.p_k1[0] + mov[0]
self.p_k1[1] = self.p_k1[1] + mov[1]
# COMPUTE THE ALLOWED ACTIONS AFTER AGENT'S ACTION
# Allowed actions for the agent's king
self.dfk1_constrain, self.a_k1, self.dfk1 = degree_freedom_king1(self.p_k1, self.p_k2, self.p_q1, self.Board)
# Allowed actions for the agent's queen
self.dfq1_constrain, self.a_q1, self.dfq1 = degree_freedom_queen(self.p_k1, self.p_k2, self.p_q1, self.Board)
# Allowed actions for the enemy's king
self.dfk2_constrain, self.a_k2, self.check = degree_freedom_king2(self.dfk1, self.p_k2, self.dfq1, self.Board, self.p_k1)
# CHECK IF POSITION IS A CHECMATE, DRAW, OR THE GAME CONTINUES
# CASE OF CHECKMATE
if np.sum(self.dfk2_constrain) == 0 and self.dfq1[self.p_k2[0], self.p_k2[1]] == 1:
# King 2 has no freedom and it is checked
# Checkmate and collect reward
Done = 1 # The epsiode ends
R = 1 # Reward for checkmate
allowed_a=[] # Allowed_a set to nothing (end of the episode)
X=[] # Features set to nothing (end of the episode)
# CASE OF DRAW
elif np.sum(self.dfk2_constrain) == 0 and self.dfq1[self.p_k2[0], self.p_k2[1]] == 0:
# King 2 has no freedom but it is not checked
Done = 1 # The epsiode ends
R = 0. # Reward for draw
allowed_a=[] # Allowed_a set to nothing (end of the episode)
X=[] # Features set to nothing (end of the episode)
# THE GAME CONTINUES
else:
# THE OPPONENT MOVES THE KING IN A RANDOM SAFE LOCATION
allowed_enemy_a = np.where(self.a_k2 > 0)[0]
a_help = int(np.ceil(np.random.rand() * allowed_enemy_a.shape[0]) - 1)
a_enemy = allowed_enemy_a[a_help]
direction = a_enemy
steps = 1
self.Board[self.p_k2[0], self.p_k2[1]] = 0
mov = self.map[direction, :] * steps
self.Board[self.p_k2[0] + mov[0], self.p_k2[1] + mov[1]] = 3
self.p_k2[0] = self.p_k2[0] + mov[0]
self.p_k2[1] = self.p_k2[1] + mov[1]
# COMPUTE THE ALLOWED ACTIONS AFTER THE OPPONENT'S ACTION
# Possible actions of the King
self.dfk1_constrain, self.a_k1, self.dfk1 = degree_freedom_king1(self.p_k1, self.p_k2, self.p_q1, self.Board)
# Allowed actions for the agent's king
self.dfq1_constrain, self.a_q1, self.dfq1 = degree_freedom_queen(self.p_k1, self.p_k2, self.p_q1, self.Board)
# Allowed actions for the enemy's king
self.dfk2_constrain, self.a_k2, self.check = degree_freedom_king2(self.dfk1, self.p_k2, self.dfq1, self.Board, self.p_k1)
# ALLOWED ACTIONS FOR THE AGENT, ONE-HOT ENCODED
allowed_a=np.concatenate([self.a_q1,self.a_k1],0)
# FEATURES
X=self.Features()
return self.Board, X, allowed_a, R, Done
# DEFINITION OF THE FEATURES (SEE ALSO ASSIGNMENT DESCRIPTION)
def Features(self):
s_k1 = np.array(self.Board == 1).astype(float).reshape(-1) # FEATURES FOR KING POSITION
s_q1 = np.array(self.Board == 2).astype(float).reshape(-1) # FEATURES FOR QUEEN POSITION
s_k2 = np.array(self.Board == 3).astype(float).reshape(-1) # FEATURE FOR ENEMY'S KING POSITION
check=np.zeros([2]) # CHECK? FEATURE
check[self.check]=1
K2dof=np.zeros([8]) # NUMBER OF ALLOWED ACTIONS FOR ENEMY'S KING, ONE-HOT ENCODED
K2dof[np.sum(self.dfk2_constrain).astype(int)]=1
# ALL FEATURES...
x = np.concatenate([s_k1, s_q1, s_k2, check, K2dof],0)
return x