@@ -7,28 +7,27 @@ program quadratic_fit
7
7
use nf_optimizers, only: sgd
8
8
9
9
implicit none
10
- type (network) :: net_sgd, net_batch_sgd, net_minibatch_sgd, net_rms_prop
10
+ type (network) :: net( 6 )
11
11
12
12
! Training parameters
13
13
integer , parameter :: num_epochs = 1000
14
14
integer , parameter :: train_size = 1000
15
- integer , parameter :: test_size = 30
16
- integer , parameter :: batch_size = 10
15
+ integer , parameter :: test_size = 100
16
+ integer , parameter :: batch_size = 100
17
17
real , parameter :: learning_rate = 0.01
18
18
real , parameter :: decay_rate = 0.9
19
19
20
20
! Input and output data
21
21
real , allocatable :: x(:), y(:) ! training data
22
22
real , allocatable :: xtest(:), ytest(:) ! testing data
23
- real , allocatable :: ypred_sgd(:), ypred_batch_sgd(:), ypred_minibatch_sgd(:), ypred_rms_prop(:)
24
23
25
24
integer :: i, n
26
25
27
26
print ' ("Fitting quadratic function")'
28
27
print ' (60("="))'
29
28
30
29
allocate (xtest(test_size), ytest(test_size))
31
- xtest = [((i - 1 ) * 2 / test_size, i = 1 , test_size)]
30
+ xtest = [(real (i - 1 ) * 2 / test_size, i = 1 , test_size)]
32
31
ytest = quadratic(xtest)
33
32
34
33
! x and y as 1-D arrays
@@ -41,38 +40,30 @@ program quadratic_fit
41
40
end do
42
41
y = quadratic(x)
43
42
44
- ! Instantiate a separate network for each optimization method.
45
- net_sgd = network([input(1 ), dense(3 ), dense(1 )])
46
- net_batch_sgd = network([input(1 ), dense(3 ), dense(1 )])
47
- net_minibatch_sgd = network([input(1 ), dense(3 ), dense(1 )])
48
- net_rms_prop = network([input(1 ), dense(3 ), dense(1 )])
43
+ ! Instantiate a network and copy an instance to the rest of the array
44
+ net(1 ) = network([input(1 ), dense(3 ), dense(1 )])
45
+ net(2 :) = net(1 )
49
46
50
47
! Print network info to stdout; this will be the same for all three networks.
51
- call net_sgd % print_info()
48
+ call net( 1 ) % print_info()
52
49
53
- ! SGD optimizer
54
- call sgd_optimizer(net_sgd, x, y, learning_rate, num_epochs)
50
+ ! SGD, no momentum
51
+ call sgd_optimizer(net(1 ), x, y, xtest, ytest, learning_rate, num_epochs)
52
+
53
+ ! SGD, momentum
54
+ call sgd_optimizer(net(2 ), x, y, xtest, ytest, learning_rate, num_epochs, momentum= 0.9 )
55
+
56
+ ! SGD, momentum with Nesterov
57
+ call sgd_optimizer(net(3 ), x, y, xtest, ytest, learning_rate, num_epochs, momentum= 0.9 , nesterov= .true. )
55
58
56
59
! Batch SGD optimizer
57
- call batch_gd_optimizer(net_batch_sgd , x, y, learning_rate, num_epochs)
60
+ call batch_gd_optimizer(net( 4 ) , x, y, xtest, ytest , learning_rate, num_epochs)
58
61
59
62
! Mini-batch SGD optimizer
60
- call minibatch_gd_optimizer(net_minibatch_sgd , x, y, learning_rate, num_epochs, batch_size)
63
+ call minibatch_gd_optimizer(net( 5 ) , x, y, xtest, ytest , learning_rate, num_epochs, batch_size)
61
64
62
65
! RMSProp optimizer
63
- call rmsprop_optimizer(net_rms_prop, x, y, learning_rate, num_epochs, decay_rate)
64
-
65
- ! Calculate predictions on the test set
66
- ypred_sgd = [(net_sgd % predict([xtest(i)]), i = 1 , test_size)]
67
- ypred_batch_sgd = [(net_batch_sgd % predict([xtest(i)]), i = 1 , test_size)]
68
- ypred_minibatch_sgd = [(net_minibatch_sgd % predict([xtest(i)]), i = 1 , test_size)]
69
- ypred_rms_prop = [(net_rms_prop % predict([xtest(i)]), i = 1 , test_size)]
70
-
71
- ! Print the mean squared error
72
- print ' ("Stochastic gradient descent MSE:", f9.6)' , sum ((ypred_sgd - ytest)** 2 ) / size (ytest)
73
- print ' (" Batch gradient descent MSE: ", f9.6)' , sum ((ypred_batch_sgd - ytest)** 2 ) / size (ytest)
74
- print ' (" Minibatch gradient descent MSE: ", f9.6)' , sum ((ypred_minibatch_sgd - ytest)** 2 ) / size (ytest)
75
- print ' (" RMSProp MSE: ", f9.6)' , sum ((ypred_rms_prop - ytest)** 2 ) / size (ytest)
66
+ call rmsprop_optimizer(net(6 ), x, y, xtest, ytest, learning_rate, num_epochs, decay_rate)
76
67
77
68
contains
78
69
@@ -82,65 +73,107 @@ real elemental function quadratic(x) result(y)
82
73
y = (x** 2 / 2 + x / 2 + 1 ) / 2
83
74
end function quadratic
84
75
85
- subroutine sgd_optimizer (net , x , y , learning_rate , num_epochs )
76
+ subroutine sgd_optimizer (net , x , y , xtest , ytest , learning_rate , num_epochs , momentum , nesterov )
86
77
! In the stochastic gradient descent (SGD) optimizer, we run the forward
87
78
! and backward passes and update the weights for each training sample,
88
79
! one at a time.
89
80
type (network), intent (inout ) :: net
90
81
real , intent (in ) :: x(:), y(:)
82
+ real , intent (in ) :: xtest(:), ytest(:)
91
83
real , intent (in ) :: learning_rate
92
84
integer , intent (in ) :: num_epochs
85
+ real , intent (in ), optional :: momentum
86
+ logical , intent (in ), optional :: nesterov
87
+ real , allocatable :: ypred(:)
88
+ real :: momentum_value
89
+ logical :: nesterov_value
93
90
integer :: i, n
94
91
95
- print * , " Running SGD optimizer..."
92
+ print ' (a)' , ' Stochastic gradient descent'
93
+ print ' (34("-"))'
94
+
95
+ ! Set default values for momentum and nesterov
96
+ if (.not. present (momentum)) then
97
+ momentum_value = 0.0
98
+ else
99
+ momentum_value = momentum
100
+ end if
101
+
102
+ if (.not. present (nesterov)) then
103
+ nesterov_value = .false.
104
+ else
105
+ nesterov_value = nesterov
106
+ end if
96
107
97
108
do n = 1 , num_epochs
98
109
do i = 1 , size (x)
99
110
call net % forward([x(i)])
100
111
call net % backward([y(i)])
101
- call net % update(sgd(learning_rate= learning_rate))
112
+ call net % update(sgd(learning_rate= learning_rate, momentum = momentum_value, nesterov = nesterov_value ))
102
113
end do
114
+
115
+ if (mod (n, num_epochs / 10 ) == 0 ) then
116
+ ypred = [(net % predict([xtest(i)]), i = 1 , size (xtest))]
117
+ print ' ("Epoch: ", i4,"/",i4,", RMSE = ", f9.6)' , n, num_epochs, sum ((ypred - ytest)** 2 ) / size (ytest)
118
+ end if
119
+
103
120
end do
104
121
122
+ print * , ' '
123
+
105
124
end subroutine sgd_optimizer
106
125
107
- subroutine batch_gd_optimizer (net , x , y , learning_rate , num_epochs )
126
+ subroutine batch_gd_optimizer (net , x , y , xtest , ytest , learning_rate , num_epochs )
108
127
! Like the stochastic gradient descent (SGD) optimizer, except that here we
109
128
! accumulate the weight gradients for all training samples and update the
110
129
! weights once per epoch.
111
130
type (network), intent (inout ) :: net
112
131
real , intent (in ) :: x(:), y(:)
132
+ real , intent (in ) :: xtest(:), ytest(:)
113
133
real , intent (in ) :: learning_rate
114
134
integer , intent (in ) :: num_epochs
135
+ real , allocatable :: ypred(:)
115
136
integer :: i, n
116
137
117
- print * , " Running batch GD optimizer..."
138
+ print ' (a)' , ' Batch gradient descent'
139
+ print ' (34("-"))'
118
140
119
141
do n = 1 , num_epochs
120
142
do i = 1 , size (x)
121
143
call net % forward([x(i)])
122
144
call net % backward([y(i)])
123
145
end do
124
146
call net % update(sgd(learning_rate= learning_rate / size (x)))
147
+
148
+ if (mod (n, num_epochs / 10 ) == 0 ) then
149
+ ypred = [(net % predict([xtest(i)]), i = 1 , size (xtest))]
150
+ print ' ("Epoch: ", i4,"/",i4,", RMSE = ", f9.6)' , n, num_epochs, sum ((ypred - ytest)** 2 ) / size (ytest)
151
+ end if
152
+
125
153
end do
126
154
155
+ print * , ' '
156
+
127
157
end subroutine batch_gd_optimizer
128
158
129
- subroutine minibatch_gd_optimizer (net , x , y , learning_rate , num_epochs , batch_size )
159
+ subroutine minibatch_gd_optimizer (net , x , y , xtest , ytest , learning_rate , num_epochs , batch_size )
130
160
! Like the batch SGD optimizer, except that here we accumulate the weight
131
161
! over a number of mini batches and update the weights once per mini batch.
132
162
!
133
163
! Note: -O3 on GFortran must be accompanied with -fno-frontend-optimize for
134
164
! this subroutine to converge to a solution.
135
165
type (network), intent (inout ) :: net
136
166
real , intent (in ) :: x(:), y(:)
167
+ real , intent (in ) :: xtest(:), ytest(:)
137
168
real , intent (in ) :: learning_rate
138
169
integer , intent (in ) :: num_epochs, batch_size
139
170
integer :: i, j, n, num_samples, num_batches, start_index, end_index
140
171
real , allocatable :: batch_x(:), batch_y(:)
141
172
integer , allocatable :: batch_indices(:)
173
+ real , allocatable :: ypred(:)
142
174
143
- print * , " Running mini-batch GD optimizer..."
175
+ print ' (a)' , ' Minibatch gradient descent'
176
+ print ' (34("-"))'
144
177
145
178
num_samples = size (x)
146
179
num_batches = num_samples / batch_size
@@ -167,17 +200,28 @@ subroutine minibatch_gd_optimizer(net, x, y, learning_rate, num_epochs, batch_si
167
200
168
201
call net % update(sgd(learning_rate= learning_rate / batch_size))
169
202
end do
203
+
204
+ if (mod (n, num_epochs / 10 ) == 0 ) then
205
+ ypred = [(net % predict([xtest(i)]), i = 1 , size (xtest))]
206
+ print ' ("Epoch: ", i4,"/",i4,", RMSE = ", f9.6)' , n, num_epochs, sum ((ypred - ytest)** 2 ) / size (ytest)
207
+ end if
208
+
170
209
end do
210
+
211
+ print * , ' '
212
+
171
213
end subroutine minibatch_gd_optimizer
172
214
173
- subroutine rmsprop_optimizer (net , x , y , learning_rate , num_epochs , decay_rate )
215
+ subroutine rmsprop_optimizer (net , x , y , xtest , ytest , learning_rate , num_epochs , decay_rate )
174
216
! RMSprop optimizer for updating weights using root mean square
175
217
type (network), intent (inout ) :: net
176
218
real , intent (in ) :: x(:), y(:)
219
+ real , intent (in ) :: xtest(:), ytest(:)
177
220
real , intent (in ) :: learning_rate, decay_rate
178
221
integer , intent (in ) :: num_epochs
179
222
integer :: i, j, n
180
223
real , parameter :: epsilon = 1e-8 ! Small constant to avoid division by zero
224
+ real , allocatable :: ypred(:)
181
225
182
226
! Define a dedicated type to store the RMSprop gradients.
183
227
! This is needed because array sizes vary between layers and we need to
@@ -191,7 +235,8 @@ subroutine rmsprop_optimizer(net, x, y, learning_rate, num_epochs, decay_rate)
191
235
192
236
type (rms_gradient_dense), allocatable :: rms(:)
193
237
194
- print * , " Running RMSprop optimizer..."
238
+ print ' (a)' , ' RMSProp optimizer'
239
+ print ' (34("-"))'
195
240
196
241
! Here we allocate the array or RMS gradient derived types.
197
242
! We need one for each dense layer, however we will allocate it to the
@@ -237,8 +282,15 @@ subroutine rmsprop_optimizer(net, x, y, learning_rate, num_epochs, decay_rate)
237
282
end select
238
283
end do
239
284
285
+ if (mod (n, num_epochs / 10 ) == 0 ) then
286
+ ypred = [(net % predict([xtest(i)]), i = 1 , size (xtest))]
287
+ print ' ("Epoch: ", i4,"/",i4,", RMSE = ", f9.6)' , n, num_epochs, sum ((ypred - ytest)** 2 ) / size (ytest)
288
+ end if
289
+
240
290
end do
241
291
292
+ print * , ' '
293
+
242
294
end subroutine rmsprop_optimizer
243
295
244
296
subroutine shuffle (arr )
0 commit comments