@@ -17,8 +17,8 @@ def create_config(env_id, observation_shape_list, action_space_size_list, collec
17
17
action_space_size_list = action_space_size_list ,
18
18
from_pixels = False ,
19
19
# ===== only for debug =====
20
- # frame_skip=100 , # 100
21
- frame_skip = 2 ,
20
+ frame_skip = 50 , # 100
21
+ # frame_skip=2,
22
22
continuous = True , # Assuming all DMC tasks use continuous action spaces
23
23
collector_env_num = collector_env_num ,
24
24
evaluator_env_num = evaluator_env_num ,
@@ -38,6 +38,7 @@ def create_config(env_id, observation_shape_list, action_space_size_list, collec
38
38
calpha = 0.5 , rescale = 1 ,
39
39
),
40
40
use_moco = True , # ==============TODO==============
41
+ total_task_num = len (env_id_list ),
41
42
task_num = len (env_id_list ),
42
43
task_id = 0 , # To be set per task
43
44
model = dict (
@@ -54,6 +55,7 @@ def create_config(env_id, observation_shape_list, action_space_size_list, collec
54
55
# use_shared_projection=True, # TODO
55
56
use_shared_projection = False ,
56
57
# use_task_embed=True, # TODO
58
+ task_embed_option = None , # ==============TODO: none ==============
57
59
use_task_embed = False , # ==============TODO==============
58
60
num_unroll_steps = num_unroll_steps ,
59
61
policy_entropy_weight = 5e-2 ,
@@ -90,6 +92,7 @@ def create_config(env_id, observation_shape_list, action_space_size_list, collec
90
92
num_experts_of_moe_in_transformer = 4 ,
91
93
),
92
94
),
95
+ use_task_exploitation_weight = False , # TODO
93
96
# task_complexity_weight=True, # TODO
94
97
task_complexity_weight = False , # TODO
95
98
total_batch_size = total_batch_size ,
@@ -153,7 +156,7 @@ def generate_configs(env_id_list: List[str],
153
156
# TODO: debug
154
157
# exp_name_prefix = f'data_suz_mt_20250113/ddp_8gpu_nlayer8_upc200_taskweight-eval1e3-10k-temp10-1_task-embed_{len(env_id_list)}tasks_brf{buffer_reanalyze_freq}_tbs{total_batch_size}_seed{seed}/'
155
158
156
- exp_name_prefix = f'data_suz_mt_20250113/ddp_1gpu-moco_nlayer8_upc80_notaskweight-eval1e3-10k-temp10-1_no -task-embed_{ len (env_id_list )} tasks_brf{ buffer_reanalyze_freq } _tbs{ total_batch_size } _seed{ seed } /'
159
+ exp_name_prefix = f'data_suz_mt_20250207_debug/ddp_2gpu-moco_nlayer8_upc200_notaskweight_no -task-embed_{ len (env_id_list )} tasks_brf{ buffer_reanalyze_freq } _tbs{ total_batch_size } _seed{ seed } /'
157
160
158
161
# exp_name_prefix = f'data_suz_mt_20250113/ddp_3gpu_3games_nlayer8_upc200_notusp_notaskweight-symlog-01-05-eval1e3_{len(env_id_list)}tasks_brf{buffer_reanalyze_freq}_tbs{total_batch_size}_seed{seed}/'
159
162
@@ -205,7 +208,7 @@ def create_env_manager():
205
208
Overview:
206
209
This script should be executed with <nproc_per_node> GPUs.
207
210
Run the following command to launch the script:
208
- python -m torch.distributed.launch --nproc_per_node=8 --master_port=29500 ./zoo/dmc2gym/config/dmc2gym_state_suz_multitask_ddp_8games_config .py
211
+ python -m torch.distributed.launch --nproc_per_node=2 --master_port=29500 ./zoo/dmc2gym/config/dmc2gym_state_suz_multitask_ddp_8games_moco_config .py
209
212
torchrun --nproc_per_node=8 ./zoo/dmc2gym/config/dmc2gym_state_suz_multitask_ddp_config.py
210
213
"""
211
214
@@ -236,16 +239,16 @@ def create_env_manager():
236
239
# ]
237
240
238
241
# DMC 8games
239
- env_id_list = [
240
- 'acrobot-swingup' ,
241
- 'cartpole-balance' ,
242
- 'cartpole-balance_sparse' ,
243
- 'cartpole-swingup' ,
244
- 'cartpole-swingup_sparse' ,
245
- 'cheetah-run' ,
246
- "ball_in_cup-catch" ,
247
- "finger-spin" ,
248
- ]
242
+ # env_id_list = [
243
+ # 'acrobot-swingup',
244
+ # 'cartpole-balance',
245
+ # 'cartpole-balance_sparse',
246
+ # 'cartpole-swingup',
247
+ # 'cartpole-swingup_sparse',
248
+ # 'cheetah-run',
249
+ # "ball_in_cup-catch",
250
+ # "finger-spin",
251
+ # ]
249
252
250
253
# DMC 18games
251
254
# env_id_list = [
@@ -278,18 +281,18 @@ def create_env_manager():
278
281
n_episode = 8
279
282
evaluator_env_num = 3
280
283
num_simulations = 50
281
- # max_env_step = int(5e5)
282
- max_env_step = int (1e6 )
284
+ max_env_step = int (5e5 )
285
+ # max_env_step = int(1e6)
283
286
284
287
reanalyze_ratio = 0.0
285
288
286
- # nlayer=4
289
+ # nlayer=4/8
287
290
total_batch_size = 512
288
291
batch_size = [int (min (64 , total_batch_size / len (env_id_list ))) for _ in range (len (env_id_list ))]
289
292
290
- # nlayer=8/ 12
291
- total_batch_size = 256
292
- batch_size = [int (min (32 , total_batch_size / len (env_id_list ))) for _ in range (len (env_id_list ))]
293
+ # # nlayer=12
294
+ # total_batch_size = 256
295
+ # batch_size = [int(min(64 , total_batch_size / len(env_id_list))) for _ in range(len(env_id_list))]
293
296
294
297
num_unroll_steps = 5
295
298
infer_context_length = 2
@@ -299,12 +302,12 @@ def create_env_manager():
299
302
reanalyze_partition = 0.75
300
303
301
304
# ======== TODO: only for debug ========
302
- # collector_env_num = 2
303
- # num_segments = 2
304
- # n_episode = 2
305
- # evaluator_env_num = 2
306
- # num_simulations = 1
307
- # batch_size = [4 for _ in range(len(env_id_list))]
305
+ collector_env_num = 2
306
+ num_segments = 2
307
+ n_episode = 2
308
+ evaluator_env_num = 2
309
+ num_simulations = 1
310
+ batch_size = [4 for _ in range (len (env_id_list ))]
308
311
# =======================================
309
312
310
313
seed = 0 # You can iterate over multiple seeds if needed
0 commit comments