@@ -4,10 +4,10 @@ program quadratic_fit
4
4
! descent.
5
5
use nf, only: dense, input, network
6
6
use nf_dense_layer, only: dense_layer
7
- use nf_optimizers, only: sgd, rmsprop, adam
7
+ use nf_optimizers, only: sgd, rmsprop, adam, adagrad
8
8
9
9
implicit none
10
- type (network) :: net(9 )
10
+ type (network) :: net(11 )
11
11
12
12
! Training parameters
13
13
integer , parameter :: num_epochs = 1000
@@ -95,6 +95,17 @@ program quadratic_fit
95
95
beta1, beta2, epsilon, weight_decay_decoupled= 1e-5 &
96
96
)
97
97
98
+ ! Adagrad optimizer
99
+ call adagrad_optimizer( &
100
+ net(10 ), x, y, xtest, ytest, learning_rate, num_epochs, epsilon &
101
+ )
102
+
103
+ ! Adagrad optimizer with L2 regularization and learning rate decay
104
+ call adagrad_optimizer( &
105
+ net(11 ), x, y, xtest, ytest, learning_rate, num_epochs, epsilon, &
106
+ weight_decay_l2= 1e-4 , learning_rate_decay= 0.99 &
107
+ )
108
+
98
109
contains
99
110
100
111
real elemental function quadratic(x) result(y)
@@ -358,6 +369,68 @@ subroutine adam_optimizer( &
358
369
359
370
end subroutine adam_optimizer
360
371
372
+ subroutine adagrad_optimizer ( &
373
+ net , x , y , xtest , ytest , learning_rate , num_epochs , epsilon , &
374
+ weight_decay_l2 , learning_rate_decay &
375
+ )
376
+ ! Adagrad optimizer for updating weights using adaptive gradient algorithm
377
+ type (network), intent (inout ) :: net
378
+ real , intent (in ) :: x(:), y(:)
379
+ real , intent (in ) :: xtest(:), ytest(:)
380
+ real , intent (in ) :: learning_rate, epsilon
381
+ real , intent (in ), optional :: weight_decay_l2
382
+ real , intent (in ), optional :: learning_rate_decay
383
+ integer , intent (in ) :: num_epochs
384
+ integer :: i, n
385
+ real , allocatable :: ypred(:)
386
+ real :: weight_decay_l2_val
387
+ real :: learning_rate_decay_val
388
+
389
+ ! Set default values for weight_decay_l2
390
+ if (.not. present (weight_decay_l2)) then
391
+ weight_decay_l2_val = 0.0
392
+ else
393
+ weight_decay_l2_val = weight_decay_l2
394
+ end if
395
+
396
+ ! Set default values for learning_rate_decay
397
+ if (.not. present (learning_rate_decay)) then
398
+ learning_rate_decay_val = 0.0
399
+ else
400
+ learning_rate_decay_val = learning_rate_decay
401
+ end if
402
+
403
+ print ' (a)' , ' Adagrad optimizer'
404
+ print ' (34("-"))'
405
+
406
+ do n = 1 , num_epochs
407
+
408
+ do i = 1 , size (x)
409
+ call net % forward([x(i)])
410
+ call net % backward([y(i)])
411
+ end do
412
+
413
+ call net % update( &
414
+ adagrad( &
415
+ learning_rate= learning_rate, &
416
+ epsilon= epsilon, &
417
+ weight_decay_l2= weight_decay_l2_val, &
418
+ learning_rate_decay= learning_rate_decay_val &
419
+ ) &
420
+ )
421
+
422
+ if (mod (n, num_epochs / 10 ) == 0 ) then
423
+ ypred = [(net % predict([xtest(i)]), i = 1 , size (xtest))]
424
+ print ' ("Epoch: ", i4,"/",i4,", RMSE = ", f9.6)' , &
425
+ n, num_epochs, sum ((ypred - ytest)** 2 ) / size (ytest)
426
+ end if
427
+
428
+ end do
429
+
430
+ print * , ' '
431
+
432
+ end subroutine adagrad_optimizer
433
+
361
434
subroutine shuffle (arr )
362
435
! Shuffle an array using the Fisher-Yates algorithm.
363
436
integer , intent (inout ) :: arr(:)
0 commit comments