-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
331 lines (286 loc) · 11.5 KB
/
utils.py
File metadata and controls
331 lines (286 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 20 14:58:41 2020
@author: pheno and baltundas3
Integrated Human Model
Utility functions
1. Replace floyd_warshall with Johnson's for STN preprocessing
"""
import copy
import random
from collections import Counter
from collections import namedtuple
import dgl
import networkx as nx
import numpy as np
import torch
from benchmark.JohnsonUltra import johnsonU
from env.mrc_problem import MRCProblem
from env.scheduling_env import SchedulingEnv
from env.hybrid_team import HybridTeam
from env.multi_round_scheduling_env import MultiRoundSchedulingEnv
def build_hetgraph(halfDG, num_tasks, num_robots, num_humans, dur,
partials, unsch_tasks):
"""
Helper function for building HetGraph
Q nodes are built w.r.t selected_worker & unsch_tasks
valid_tasks: available tasks filtered from unsch_tasks
Args:
loc_dist_threshold: Distance threshold for two locations to be connected by an edge
"""
num_workers = num_robots + num_humans
# num_locs = map_width * map_width
num_values = len(unsch_tasks)
num_nodes_dict = {'task': num_tasks + 2,
'worker': num_workers,
'state': 1}
# print(num_nodes_dict)
# Sort the nodes and assign an index to each one
task_name_to_idx = {node: idx for idx, node in enumerate(sorted(halfDG.nodes))}
task_edge_to_idx = {(from_node, to_node): idx for idx, (from_node, to_node) in enumerate(halfDG.edges)}
# Workers:
# List of (task id, worker id) tuples
task_to_worker_data = []
for wj in range(num_workers):
# add f0
task_to_worker_data.append((0, wj))
# add si (including s0)
for i in range(len(partials[wj])):
ti = partials[wj][i].item()
task_id = ti + 1
task_to_worker_data.append((task_id, wj))
unsch_task_to_worker = []
for wj in range(num_workers):
for t in unsch_tasks:
task_id = t + 1
unsch_task_to_worker.append((task_id, wj))
# print(unsch_task_to_worker)
worker_com_data = [(i, j) for i in range(num_workers) for j in range(num_workers)]
#
data_dict = {
('task', 'temporal', 'task'): (
# Convert named edges to indexes
[task_name_to_idx[from_node] for from_node, _ in halfDG.edges],
[task_name_to_idx[to_node] for _, to_node in halfDG.edges],
),
('task', 'assigned_to', 'worker'): (
[task for task, _ in task_to_worker_data],
[worker for _, worker in task_to_worker_data],
),
('task', 'take_time', 'worker'): (
[task for task, _ in unsch_task_to_worker],
[worker for _, worker in unsch_task_to_worker],
),
('worker', 'use_time', 'task'): (
[worker for _, worker in unsch_task_to_worker],
[task for task, _ in unsch_task_to_worker],
),
('worker', 'com', 'worker'): (
[i for i, _ in worker_com_data],
[j for _, j in worker_com_data],
),
# 4. Add graph summary nodes
# [task] — [in] — [state]
('task', 'tin', 'state'): (
list(range(num_tasks + 2)),
np.zeros(num_tasks + 2, dtype=np.int64),
),
# [worker] — [in] — [state]
('worker', 'win', 'state'): (
list(range(num_workers)),
np.zeros(num_workers, dtype=np.int64),
),
# [state] — [in] — [state] self-loop
('state', 'sin', 'state'): (
[0],
[0],
)
}
# print(data_dict, num_nodes_dict)
graph = dgl.heterograph(data_dict, num_nodes_dict=num_nodes_dict, idtype=torch.int64)
# Store data of edges by index, as DiGraph.edges.data does not guarantee to have exactly the same
# ordewing as Digraph.edges
temporal_edge_weights = torch.zeros((len(halfDG.edges), 1), dtype=torch.float32)
# Unpack indexes of edge weights
weights_idx = [task_edge_to_idx[from_node, to_node] for from_node, to_node, _ in halfDG.edges.data('weight')]
# Put weights in tensor according to their indexes
temporal_edge_weights[weights_idx, :] = torch.tensor([[weight] for _, _, weight in halfDG.edges.data('weight')],
dtype=torch.float32)
graph.edges['temporal'].data['weight'] = temporal_edge_weights
takes_time_weight = torch.zeros((len(unsch_task_to_worker), 1), dtype=torch.float32)
for idx, (task, worker) in enumerate(unsch_task_to_worker):
# Subtract 2 because task 1's node id is 2, but has index 0 in dur
takes_time_weight[idx] = dur[task - 2, worker]
graph.edges['take_time'].data['t'] = takes_time_weight
# Ordewing of takes_time and uses_time edges are exactly the same
graph.edges['use_time'].data['t'] = takes_time_weight.detach().clone()
return graph
def hetgraph_node_helper(number_of_nodes, curr_partialw, curr_partials,
durations, num_workers, num_values):
"""
Generate initial node features for hetgraph
The input of hetgraph is a dictionary of node features for each type
Args:
number_of_nodes: number of nodes in half distance graph (halfDG)
curr_partialw: partial solution/whole
curr_partials: partial solution/seperate
locations: np array locations
durations: np array task durations
num_workers: number of workers
num_values: number of actions / Q values
Returns:
feat_dict: node features stored in a dict
"""
feat_dict = {}
num_locations = 0 # map_width * map_width
# Task features.
# For scheduled tasks, the feature is [1 0 dur 0 dur 0]
# For unscheduled ones, the feature is [0 1 min max-min mean std]
# print(number_of_nodes)
feat_dict['task'] = np.zeros((number_of_nodes, 6))
# print(durations)
max_dur, min_dur = durations.max(axis=1), durations.min(axis=1)
mean_dur, std_dur = durations.mean(axis=1), durations.std(axis=1)
# f0
feat_dict['task'][0, 0] = 1
# s0~si. s0 has index 1
for i in range(1, number_of_nodes):
ti = i-1
if ti in curr_partialw:
feat_dict['task'][i, 0] = 1
if ti > 0:
# Ignore s0
for j in range(num_workers):
if ti in curr_partials[j]:
rj = j
break
feat_dict['task'][i, [2, 4]] = durations[ti-1][rj]
else:
feat_dict['task'][i] = [0, 1, min_dur[ti-1], max_dur[ti-1] - min_dur[ti-1],
mean_dur[ti-1], std_dur[ti-1]]
# # [loc]
# feat_dict['loc'] = np.zeros((num_locations, 1))
# serialized_locs = [(locations[i, 1] - 1) * map_width + locations[i, 0] - 1 for i in range(locations.shape[0])]
# loc_counter = Counter(serialized_locs)
# for i in range(num_locations):
# # number of tasks in location
# feat_dict['loc'][i, 0] = loc_counter[i]
# [worker]
feat_dict['worker'] = np.zeros((num_workers, 1))
for i in range(num_workers):
# number of tasks assigned so far
# including s0
feat_dict['worker'][i, 0] = len(curr_partials[i])
# [state]
feat_dict['state'] = np.array((number_of_nodes-1, len(curr_partialw),
num_locations, num_workers)).reshape(1,4)
return feat_dict
'''
Transition for n-step
state
curr_g: networkx graph updated with current solution
curr_partials: partial solution as a list of numpy arrays (int)
[sd0 sd1 ...]
sd0: partial schedule of robot 0
sd1: partial schedule of robot 1
......
curr_partialw: partial schedule of all tasks selected
locations: the location of each task
durations: the duration of each task
action
act_task: ti
act_robot: rj
append ti to rj's partial schedule
reward
reward_n: total future discounted rewards
state after 1-step
next_g: networkx graph
next_partial: next partial solution
termination
next_done: if True, means the next state is a termination state
one episode finishes
1. finish with feasible solution
2. stop with infeasible partial
'''
Transition = namedtuple('Transition',
('env_id', 'curr_g', 'curr_partials', 'curr_partialw',
# 'locs',
'durs',
'act_task', 'act_robot',
'reward_n', 'next_g', 'next_partials',
'next_partialw', 'next_done'))
'''
Replay buffer
'''
class ReplayMemory(object):
def __init__(self, capacity):
self.capacity = capacity
self.memory = []
self.position = 0
# Saves a transition
def push(self, *args):
if len(self.memory) < self.capacity:
self.memory.append(None)
self.memory[self.position] = Transition(*args)
self.position = (self.position + 1) % self.capacity
def sample(self, batch_size):
# print("Memory", self.memory, batch_size)
return random.sample(self.memory, batch_size)
def __len__(self):
return len(self.memory)
'''
Enumerate all possible insertions (rollout version) based on
num_tasks: number of total tasks 1~N
curr_partialw: partial solution
Return
act_task: list of all possible insertions
'''
def action_helper_rollout(num_tasks, curr_partialw):
act_task = []
# pick a task t_i from {unallocated}
for i in range(1, num_tasks + 1):
if i not in curr_partialw:
act_task.append(i)
return np.array(act_task)
if __name__ == '__main__':
# problem path
fname = 'env/tmp/test_file'
problem = MRCProblem(fname=fname)
team = HybridTeam(problem)
# initialize env
env = SchedulingEnv(problem, team)
# env.g is the original STN
print(env.graph.nodes())
print(env.graph.number_of_edges())
# env.halfDG is the simplified graph to be used for graph construction
print(sorted(env.halfDG.nodes()))
print(env.halfDG.number_of_edges())
# load solution
# problem.get_solution()
if problem.optimal_schedule is None:
problem.get_optimal_with_gurobi(fname+'_gurobi.log', threads=2)
optimals = problem.optimal_schedule[0]
optimalw = problem.optimal_schedule[1]
print(env.problem.optimal_schedule)
print('Initial makespan: ', env.min_makespan)
# check gurobi solution
rs = []
for i in range(len(optimalw)):
for j in range(len(env.team)):
if optimalw[i] in optimals[j]:
rj = j
break
task_id = optimalw[i]
rt, reward, done, arg = env.step(task_id, rj)
rs.append(reward)
print('Insert %d, %d' % (optimalw[i], rj))
print('No. Edges: %d' % env.halfDG.number_of_edges())
print('Returns: ', rt, reward, done, env.min_makespan)
if not rt:
print('Infeasible!')
break
print(env.partialw)
print(sum(rs))
print('test passed')
g = build_hetgraph(env.halfDG, problem.num_tasks, problem.num_robots, problem.num_humans, env.dur, env.partials, env.get_unscheduled_tasks())
print(g)