-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTSP_env.py
More file actions
104 lines (71 loc) · 3.11 KB
/
TSP_env.py
File metadata and controls
104 lines (71 loc) · 3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gym
from gym import spaces
import pygame
import numpy as np
import networkx as nx
from scipy.spatial.distance import euclidean
class TSP(gym.Env):
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
def __init__(self, size=6):
self.size = size
self.observation_space = spaces.Box(low = np.ones((self.size,self.size)),
high = np.ones((self.size,self.size))*100,
)
self.mult = np.ones((self.size, self.size)) - np.eye(self.size)
self.adjacency_matrix = self.observation_space.sample()*self.mult
self.path_graph = nx.turan_graph(self.size, 1)
self.poss = nx.spring_layout(self.path_graph.nodes)
self.action_space = spaces.Discrete(self.size)
self.coeff = -10**8
self.possible_action = [1 for i in range(self.size)]
self.possible_action[0] = self.coeff
self.path = [0]
def action_map(self, action):
# if action in self.possible_action:
# idx = self.possible_action.index(action)
# del self.possible_action[idx]
# return action
# else:
return action
def graph_gen(self):
G = nx.complete_graph(self.size)
pos = nx.spring_layout(G)
for (a, b, _) in G.edges(data=True):
distance = euclidean(pos[a], pos[b])
G[a][b]['weight'] = distance
return G
def update_graph(self):
self.path_graph.add_edge()
return self.a_matrix
def _get_obs(self):
return {"adjacency_matrix": self.adjacency_matrix.flatten(), "mask": torch.tensor(self.possible_action)}
def _get_info(self, action):
return {"distance": self.adjacency_matrix[self.path[-1]][action]}
def reset(self, seed=None, return_info=False, options=None):
super().reset(seed=seed)
self.path = [0]
self.possible_action = [1 for i in range(self.size)]
self.possible_action[0] = self.coeff
self.path_graph = nx.turan_graph(self.size, 1)
self.adjacency_matrix = self.observation_space.sample()*self.mult
observation = self._get_obs()
info = self._get_info(0)
return (observation, info) if return_info else observation
def step(self, action):
reward = 0
action = self.action_map(action)
self.path_graph.add_edge(self.path[-1], action)
self.possible_action[action] = self.coeff
reward = reward - self.adjacency_matrix[self.path[-1]][action]
done = True if np.sum(self.possible_action) <= self.coeff*self.size else False
self.path.append(action)
observation = self._get_obs()
info = self._get_info(action)
return observation, reward, done, info
def render(self, mode="human"):
if mode == "human":
nx.draw(self.path_graph, pos=self.poss)
else:
return nx.to_numpy_array(self.path_graph)
def close(self):
return