-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathplots.py
112 lines (90 loc) · 3.49 KB
/
plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.colors import colorConverter as cc
import numpy as np
from scipy import ndimage
Hyper_parameter_plot = False
if Hyper_parameter_plot:
#Help from: https://stackoverflow.com/questions/4700614/how-to-put-the-legend-out-of-the-plot
fig = plt.figure(figsize=(10,5))
ax = plt.subplot(111)
x = np.arange(0,102,2)
#Ordinary Importance Sampling
data = np.array(np.load('/content/Priority_DDPG_InvertedPendulum_0_OIS.npy'))
ax.plot(x,data,label='\u03B2 = 0')
#Weighted Importance Sampling, annealing from 0 to 1
data = np.array(np.load('/content/Priority_DDPG_InvertedPendulum_0_WIS_0.npy'))
ax.plot(x,data,label='\u03B2 = 0 \u2192 1 ')
#Weighted Importance Sampling, annealing from 0.6 to 1
data = np.array(np.load('/content/Priority_DDPG_InvertedPendulum_0_WIS_0_6.npy'))
ax.plot(x,data,label='\u03B2 = 0.6 \u2192 1 ')
box = ax.get_position()
ax.set_position([box.x0, box.y0 + box.height * 0.1,
box.width, box.height * 0.9])
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.0),
ncol=3, fancybox=True, shadow=True)
#plt.legend()
plt.title('\u03B2 Annealing Schedule', fontsize=18)
plt.xlabel('time steps (1e3)',fontsize=14)
plt.ylabel('average return',fontsize=14)
plt.ylim(-50,1140)
plt.show()
fig = plt.figure()
x = np.arange(0,102,2)
models = ['','Priority_']
labels = ['uniform replay sampling','prioritized replay sampling']
for i in range(len(models)):
data = np.zeros([3,51])
for j in range(3):
filename = models[i]+'DDPG_InvertedPendulum_'+str(j)+'.npy'
#print(filename)
trial_data = np.array(np.load(filename))
data[j,:] = trial_data
mean = np.mean(data,axis=0)
std = np.std(data,axis=0)
mean = ndimage.uniform_filter(mean, size=3)
std = ndimage.uniform_filter(std, size=3)
plt.plot(x,mean,label=labels[i])
plt.fill_between(x,mean-std,mean+std,alpha=0.15)
plt.legend(loc='lower right')
plt.title('Inverted Pendulum')
plt.xlabel('time steps (1e3)',fontsize=14)
plt.ylabel('average return',fontsize=14)
plt.show()
fig = plt.figure()
x = np.arange(0,102,2)
models = ['','Priority_']
labels = ['uniform replay sampling','prioritized replay sampling']
for i in range(len(models)):
data = np.zeros([3,51])
for j in range(3):
filename = models[i]+'DDPG_Hopper_'+str(j)+'.npy'
#print(filename)
trial_data = np.array(np.load(filename))
data[j,:] = trial_data
mean = np.mean(data,axis=0)
std = np.std(data,axis=0)
mean = ndimage.uniform_filter(mean, size=3)
std = ndimage.uniform_filter(std, size=3)
plt.plot(x,mean,label=labels[i])
plt.fill_between(x,mean-std,mean+std,alpha=0.15)
plt.legend(loc='lower right')
plt.title('Hopper', fontsize=18)
plt.xlabel('time steps (1e3)',fontsize=14)
plt.ylabel('average return',fontsize=14)
plt.show()
envs = ['DDPG_InvertedPendulum','DDPG_Hopper']
models = ['','Priority_']
labels = ['uniform replay sampling','prioritized replay sampling']
for i in range(len(models)):
for env in envs:
data = np.zeros([3,51])
for j in range(3):
filename = models[i]+env+'_'+str(j)+'.npy'
#print(filename)
trial_data = np.array(np.load(filename))
data[j,:] = trial_data
max_data = np.max(data,axis=1)
print(models[i]+env)
print('mean: ',np.mean(max_data))
print('standard deviation: ',np.std(max_data))