Skip to content

Commit 3824f1f

Browse files
committed
updates
1 parent afa2f8d commit 3824f1f

30 files changed

+216
-53
lines changed

.gitignore

-2
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@
44
# My ignores
55
logs*
66
output/
7-
*figure*
87
sbatch*
98
script.sh
10-
plot_paper.py
119
*DS_Store*
1210
run*.sh
1311

agents/ActorCritic.py

100755100644
+1-3
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,4 @@ def learn(self):
164164
# Log
165165
if self.show_tb:
166166
self.logger.add_scalar(f'actor_loss', actor_loss.item(), self.step_count)
167-
self.logger.add_scalar(f'critic_loss', critic_loss.item(), self.step_count)
168-
self.logger.add_scalar(f'v', entries.v.mean().item(), self.step_count)
169-
self.logger.add_scalar(f'log_pi', entries.log_pi.mean().item(), self.step_count)
167+
self.logger.add_scalar(f'critic_loss', critic_loss.item(), self.step_count)

agents/BaseAgent.py

100755100644
File mode changed.

agents/DDPG.py

100755100644
File mode changed.

agents/DQN.py

100755100644
File mode changed.

agents/PPO.py

100755100644
+5-10
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,8 @@ def learn(self):
7575
self.optimizer['critic'].step()
7676
# Log
7777
if self.show_tb:
78-
self.logger.add_scalar('actor_loss', actor_loss.item(), self.step_count)
79-
self.logger.add_scalar('critic_loss', critic_loss.item(), self.step_count)
80-
self.logger.add_scalar('log_pi', entries.log_pi.mean().item(), self.step_count)
81-
action_std, entropy = self.network.get_entropy_pi(entries.state)
82-
self.logger.add_scalar('entropy', entropy.mean().item(), self.step_count)
83-
self.logger.add_scalar('action_std', action_std.mean().item(), self.step_count)
84-
self.logger.add_scalar('KL', approx_kl.item(), self.step_count)
85-
self.logger.add_scalar('IS', ratio.mean().item(), self.step_count)
86-
self.logger.add_scalar('v', prediction['v'].mean().item(), self.step_count)
87-
self.logger.add_scalar('adv', abs(entries.adv).mean().item(), self.step_count)
78+
try:
79+
self.logger.add_scalar('actor_loss', actor_loss.item(), self.step_count)
80+
except:
81+
pass
82+
self.logger.add_scalar('critic_loss', critic_loss.item(), self.step_count)

agents/REINFORCE.py

100755100644
File mode changed.

agents/RPG.py

100755100644
File mode changed.

agents/RepOffPG.py

100755100644
File mode changed.

agents/RepOnPG.py

100755100644
File mode changed.

agents/SAC.py

100755100644
File mode changed.

agents/TD3.py

100755100644
File mode changed.

agents/VanillaDQN.py

100755100644
File mode changed.

agents/__init__.py

100755100644
File mode changed.

analysis_mujoco.py

+10-22
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import os
1+
=import os
22
import math
33
from utils.plotter import Plotter
44
from utils.sweeper import unfinished_index, time_info, memory_info
@@ -39,7 +39,7 @@ def get_csv_result_dict(result, config_idx, mode='Train'):
3939
'ylim': {'min': None, 'max': None},
4040
'EMA': True,
4141
'loc': 'lower right',
42-
'sweep_keys': ['gradient_clip', 'state_normalizer'],
42+
'sweep_keys': [],
4343
'sort_by': ['Return (mean)', 'Return (se)'],
4444
'ascending': [False, True],
4545
'runs': 1
@@ -55,33 +55,21 @@ def analyze(exp, runs=1):
5555
plotter.csv_results('Test', get_csv_result_dict, get_process_result_dict)
5656
plotter.plot_results(mode='Train', indexes='all')
5757
plotter.plot_results(mode='Test', indexes='all')
58-
# indexList = [11, 43, 15, 23, 31, 19, 22]
59-
# plotter.plot_indexList(indexList, 'Train', exp)
6058

6159
envs = ["HalfCheetah-v2", "Hopper-v2", "Walker2d-v2", "Swimmer-v2", "Ant-v2", "Reacher-v2"]
6260
indexes = {
63-
'onrpg': [31, 32, 33, 34, 35, 36],
64-
'ppo': [13, 14, 15, 16, 17, 18]
61+
'ppo': [1, 2, 3, 4, 5, 6],
62+
'rpg': [7, 8, 9, 10, 11, 12]
6563
}
66-
if exp == 'rpg_onrpg':
64+
if exp == 'rpg':
6765
for i in range(6):
6866
for mode in ['Train', 'Test']:
69-
expIndexModeList = [['rpg_onrpg', indexes['onrpg'][i], mode], ['rpg_ppo', indexes['ppo'][i], mode]]
67+
expIndexModeList = [['rpg', indexes['ppo'][i], mode], ['rpg', indexes['rpg'][i], mode]]
7068
plotter.plot_expIndexModeList(expIndexModeList, f'{mode}_{envs[i]}')
7169

7270

7371
if __name__ == "__main__":
74-
# unfinished_index('rpg_offrpg', runs=5)
75-
# memory_info('rpg_offrpg', runs=5)
76-
# time_info('rpg_offrpg', runs=5)
77-
# analyze('rpg_offrpg', runs=5)
78-
79-
# unfinished_index('rpg_ppo', runs=10)
80-
# memory_info('rpg_ppo', runs=10)
81-
# time_info('rpg_ppo', runs=10)
82-
# analyze('rpg_ppo', runs=10)
83-
84-
# unfinished_index('rpg_onrpg', runs=10)
85-
# memory_info('rpg_onrpg', runs=10)
86-
# time_info('rpg_onrpg', runs=10)
87-
analyze('rpg_onrpg', runs=10)
72+
unfinished_index('rpg', runs=30)
73+
memory_info('rpg', runs=30)
74+
time_info('rpg', runs=30)
75+
analyze('rpg', runs=30)

components/exploration.py

100755100644
File mode changed.

components/network.py

100755100644
File mode changed.

components/normalizer.py

100755100644
File mode changed.

components/replay.py

100755100644
File mode changed.

configs/rpg.json

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"env": [
3+
{
4+
"name": ["HalfCheetah-v2", "Hopper-v2", "Walker2d-v2", "Swimmer-v2", "Ant-v2", "Reacher-v2"],
5+
"max_episode_steps": [-1],
6+
"input_type": ["feature"]
7+
}
8+
],
9+
"train_steps": [3e6],
10+
"steps_per_epoch": [2048],
11+
"test_per_epochs": [4],
12+
"agent": [{"name": ["PPO", "RPG"]}],
13+
"optimizer": [
14+
{
15+
"name": ["Adam"],
16+
"actor_kwargs": [{"lr": [3e-4]}],
17+
"critic_kwargs": [{"lr": [1e-3]}],
18+
"reward_kwargs": [{"lr": [1e-3]}]
19+
}
20+
],
21+
"batch_size": [64],
22+
"clip_ratio": [0.2],
23+
"target_kl": [0.01],
24+
"optimize_epochs": [10],
25+
"gradient_clip": [2],
26+
"hidden_layers": [[64,64]],
27+
"hidden_act": ["Tanh"],
28+
"display_interval": [20],
29+
"rolling_score_window": [{"Train": [20], "Test": [5]}],
30+
"discount": [0.99],
31+
"gae": [0.95],
32+
"seed": [1],
33+
"device": ["cpu"],
34+
"show_tb": [false],
35+
"generate_random_seed": [true]
36+
}

configs/rpg_gradclip.json

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"env": [
3+
{
4+
"name": ["HalfCheetah-v2", "Hopper-v2", "Walker2d-v2", "Swimmer-v2", "Ant-v2", "Reacher-v2"],
5+
"max_episode_steps": [-1],
6+
"input_type": ["feature"]
7+
}
8+
],
9+
"train_steps": [3e6],
10+
"steps_per_epoch": [2048],
11+
"test_per_epochs": [4],
12+
"agent": [{"name": ["PPO", "RPG"]}],
13+
"optimizer": [
14+
{
15+
"name": ["Adam"],
16+
"actor_kwargs": [{"lr": [3e-4]}],
17+
"critic_kwargs": [{"lr": [1e-3]}],
18+
"reward_kwargs": [{"lr": [1e-3]}]
19+
}
20+
],
21+
"batch_size": [64],
22+
"clip_ratio": [0.2],
23+
"target_kl": [0.01],
24+
"optimize_epochs": [10],
25+
"gradient_clip": [-1, 0.5, 1, 4],
26+
"hidden_layers": [[64,64]],
27+
"hidden_act": ["Tanh"],
28+
"display_interval": [20],
29+
"rolling_score_window": [{"Train": [20], "Test": [5]}],
30+
"discount": [0.99],
31+
"gae": [0.95],
32+
"seed": [1],
33+
"device": ["cpu"],
34+
"show_tb": [false],
35+
"generate_random_seed": [true]
36+
}

configs/rpg_reg.json

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"env": [
3+
{
4+
"name": ["HalfCheetah-v2", "Hopper-v2", "Walker2d-v2", "Swimmer-v2", "Ant-v2", "Reacher-v2"],
5+
"max_episode_steps": [-1],
6+
"input_type": ["feature"]
7+
}
8+
],
9+
"train_steps": [3e6],
10+
"steps_per_epoch": [2048],
11+
"test_per_epochs": [5],
12+
"agent": [{"name": ["PPO", "RPG"]}],
13+
"optimizer": [
14+
{
15+
"name": ["Adam"],
16+
"actor_kwargs": [{"lr": [3e-4]}],
17+
"critic_kwargs": [{"lr": [1e-3]}],
18+
"reward_kwargs": [{"lr": [1e-3], "weight_decay": [4e-6]}]
19+
}
20+
],
21+
"batch_size": [64],
22+
"clip_ratio": [0.2],
23+
"target_kl": [0.01],
24+
"optimize_epochs": [10],
25+
"gradient_clip": [2],
26+
"hidden_layers": [[64,64]],
27+
"hidden_act": ["Tanh"],
28+
"display_interval": [40],
29+
"rolling_score_window": [{"Train": [30], "Test": [6]}],
30+
"discount": [0.99],
31+
"gae": [0.95],
32+
"seed": [1],
33+
"device": ["cpu"],
34+
"show_tb": [false],
35+
"generate_random_seed": [true]
36+
}

diary.md

+81-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
- Goal:
1414
- Analysis:
15-
- Next:
1615

1716
## 2020-12-15
1817

@@ -369,7 +368,7 @@
369368
- Goal: benchmark OnRPG and PPO with state_normalizer and gradient clip.
370369
- Analysis:
371370
- OnRPG1 is consistently better than OnRPG; gradient_clipis is very helpful; state_normalizer helps in most envs.
372-
- OnRPG1 vs PPO: OnRPG1 wins 2 envs, lose 1, and 3 ties
371+
- OnRPG1 vs PPO: OnRPG1 wins 2 envs, losses 1, and 3 ties
373372
- Next: normalize adv with global std, i.e. std(reward+adv).
374373

375374

@@ -385,8 +384,86 @@
385384

386385
| experiment | runs | branch | commit |
387386
| ---------- | ---- | ------ | ------- |
388-
| rpg_onrpg2 | 10 | RPG | |
387+
| rpg_onrpg2 | 10 | RPG | 17ae0ca |
389388

390389
- Goal: test a variant of OnRPG1 that two IS ratios, one for reward part, one for state value. We only set IS ratio for state value to 0.
391390
- Analysis: it is better than OnRPG1 on Walker2d, but worse on several other environments.
392-
- Next:
391+
- Next:
392+
393+
394+
## 2021-01-20
395+
396+
**Change the name of OnRPG1 to RPG.**
397+
398+
| experiment | runs | branch | commit |
399+
| ---------- | ---- | ------ | ------- |
400+
| rpg_lr | 10 | RPG | 3c8e907 |
401+
402+
- Goal: sweep lr for the reward net in RPG
403+
- Analysis: no lr is consistently better than the original lr (1e-3)
404+
- Next: final benchmark RPG and PPO
405+
406+
407+
## 2021-01-21
408+
409+
| experiment | runs | branch | commit |
410+
| ---------- | ---- | ------ | ------- |
411+
| rpg1 | 20 | RPG | 3c8e907 |
412+
413+
- Goal: benchmark RPG and PPO
414+
- Analysis:
415+
- Train: 2 wins, 3 losses, 1 tie
416+
- Test: 3 wins, 1 loss, 2 ties
417+
418+
419+
| experiment | runs | branch | commit |
420+
| ---------- | ---- | ------ | ------- |
421+
| rpg_reg1 | 10 | RPG | 3c8e907 |
422+
423+
- Goal: use weight_decay to make reward net smooth
424+
- Analysis: except Reacher, there is improvement in other 5 envs.
425+
426+
427+
## 2021-01-23
428+
429+
| experiment | runs | branch | commit |
430+
| ---------- | ---- | ------ | ------- |
431+
| rpg_reg2 | 10 | RPG | 3c8e907 |
432+
| rpg_reg3 | 10 | RPG | 3c8e907 |
433+
| rpg_reg4 | 10 | RPG | 3c8e907 |
434+
435+
- Goal: sweep weight_decay with a smaller interval
436+
- Analysis: no dominant weigt_decay but 2e-6 to 6e-6 seems to be good choices; all in all, the best weight decay is 4e-6.
437+
438+
439+
## 2021-01-24
440+
441+
| experiment | runs | branch | commit |
442+
| ---------- | ---- | ------ | ------- |
443+
| rpg_reg | 30 | RPG | |
444+
445+
446+
- Goal: benchmark RPG (with weight decay 4e-6) and PPO
447+
- Analysis: PPO is better now somehow with more runs while RPG is worse :(
448+
- Train: 1 win, 2 losses, 3 ties
449+
- Test: 2 wins, 2 losses, 2 ties
450+
451+
452+
| experiment | runs | branch | commit |
453+
| ---------- | ---- | ------ | ------- |
454+
| rpg | 30 | RPG | |
455+
456+
- Goal: benchmark RPG (with no weight decay) and PPO on 6 tasks.
457+
- Analysis:
458+
- Train: 1 win, 2 losses, 3 ties
459+
- Test: 3 wins, 2 losses, 1 tie
460+
461+
462+
## 2021-01-27
463+
464+
| experiment | runs | branch | commit |
465+
| ------------ | ------ | ------ | ------- |
466+
| rpg_gradclip | 10 | RPG | |
467+
468+
- Goal: sweep grad_clip for PPO and RPG on Cedar.
469+
- Analysis: no dominant gradient clip.

envs/env.py

100755100644
File mode changed.

envs/wrapper.py

100755100644
File mode changed.

experiment.py

100755100644
File mode changed.

main.py

100755100644
+11-12
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ def main(argv):
1010
parser = argparse.ArgumentParser(description="Config file")
1111
parser.add_argument('--config_file', type=str, default='./configs/catcher.json', help='Configuration file for the chosen model')
1212
parser.add_argument('--config_idx', type=int, default=1, help='Configuration index')
13+
parser.add_argument('--slurm_dir', type=str, default='', help='slurm tempory directory')
1314
args = parser.parse_args()
1415

1516
sweeper = Sweeper(args.config_file)
@@ -27,18 +28,16 @@ def main(argv):
2728

2829
# Set experiment name and log paths
2930
cfg['exp'] = args.config_file.split('/')[-1].split('.')[0]
30-
logs_dir = f"./logs/{cfg['exp']}/{cfg['config_idx']}/"
31-
train_log_path = logs_dir + 'result_Train.feather'
32-
test_log_path = logs_dir + 'result_Test.feather'
33-
model_path = logs_dir + 'model.pt'
34-
cfg_path = logs_dir + 'config.json'
35-
cfg['logs_dir'] = logs_dir
36-
cfg['train_log_path'] = train_log_path
37-
cfg['test_log_path'] = test_log_path
38-
cfg['model_path'] = model_path
39-
cfg['cfg_path'] = cfg_path
40-
41-
make_dir(cfg['logs_dir'])
31+
if len(args.slurm_dir) > 0:
32+
cfg['logs_dir'] = f"{args.slurm_dir}/{cfg['exp']}/{cfg['config_idx']}/"
33+
make_dir(cfg['logs_dir'])
34+
else:
35+
cfg['logs_dir'] = f"./logs/{cfg['exp']}/{cfg['config_idx']}/"
36+
make_dir(f"./logs/{cfg['exp']}/{cfg['config_idx']}/")
37+
cfg['train_log_path'] = cfg['logs_dir'] + 'result_Train.feather'
38+
cfg['test_log_path'] = cfg['logs_dir'] + 'result_Test.feather'
39+
cfg['model_path'] = cfg['logs_dir'] + 'model.pt'
40+
cfg['cfg_path'] = cfg['logs_dir'] + 'config.json'
4241

4342
exp = Experiment(cfg)
4443
exp.run()

utils/helper.py

100755100644
File mode changed.

utils/logger.py

100755100644
File mode changed.

utils/sweeper.py

100755100644
File mode changed.

0 commit comments

Comments
 (0)