From fcb4d8600a27b7558782e56c383b48e35646a35e Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Sun, 23 Feb 2025 20:48:33 +0400 Subject: [PATCH 1/8] fc2d_layer: initial forward implementation --- src/nf/nf_fc2d_layer.f90 | 95 ++++++++++++++++++++++++++++++++++++++++ test/test_fc2d_layer.f90 | 67 ++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 src/nf/nf_fc2d_layer.f90 create mode 100644 test/test_fc2d_layer.f90 diff --git a/src/nf/nf_fc2d_layer.f90 b/src/nf/nf_fc2d_layer.f90 new file mode 100644 index 00000000..ab906988 --- /dev/null +++ b/src/nf/nf_fc2d_layer.f90 @@ -0,0 +1,95 @@ +module nf_fc2d_layer + use iso_fortran_env, only: stderr => error_unit + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + use nf_linear2d_layer, only: linear2d_layer + + implicit none + + private + public :: fc2d_layer + + type, extends(base_layer) :: fc2d_layer + integer :: sequence_length, hidden_size, model_dimension + + type(linear2d_layer) :: in_proj + type(linear2d_layer) :: out_proj + + class(activation_function), allocatable :: activation + + real, allocatable :: gradient(:, :) + real, allocatable :: in_proj_input(:, :) + real, allocatable :: out_proj_input(:, :) + + real, allocatable :: output(:, :) + + contains +! procedure :: backward + procedure :: forward +! procedure :: get_num_params +! procedure :: get_params +! procedure :: get_gradients +! procedure :: set_params + procedure :: init + end type fc2d_layer + + interface fc2d_layer + module function fc2d_layer_cons(hidden_size, activation) result(res) + !! This function returns the `fc2d_layer` instance. + integer, intent(in) :: hidden_size + class(activation_function), intent(in) :: activation + type(fc2d_layer) :: res + end function fc2d_layer_cons + end interface fc2d_layer + +contains + module function fc2d_layer_cons(hidden_size, activation) result(res) + !! This function returns the `fc2d_layer` instance. + integer, intent(in) :: hidden_size + class(activation_function), intent(in) :: activation + type(fc2d_layer) :: res + + res % hidden_size = hidden_size + res % activation_name = activation % get_name() + allocate(res % activation, source = activation) + end function fc2d_layer_cons + + module subroutine init(self, input_shape) + class(fc2d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + + if (size(input_shape) /= 2) then + error stop "fc2d_layer accepts 2D input" + end if + + self % sequence_length = input_shape(1) + self % model_dimension = input_shape(2) + + self % in_proj = linear2d_layer(self % hidden_size) + call self % in_proj % init([self % sequence_length, self % model_dimension]) + + self % out_proj = linear2d_layer(self % model_dimension) + call self % out_proj % init([self % sequence_length, self % hidden_size]) + + allocate(self % in_proj_input(self % sequence_length, self % model_dimension)) + allocate(self % out_proj_input(self % sequence_length, self % hidden_size)) + + allocate(self % output(self % sequence_length, self % model_dimension)) + end subroutine init + + pure module subroutine forward(self, input) + class(fc2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :) + integer :: i + + self % in_proj_input = input + call self % in_proj % forward(input) + + do concurrent(i = 1: self % sequence_length) + self % out_proj_input(i, :) = self % activation % eval_1d(self % in_proj % output(i, :)) + end do + + call self % out_proj % forward(self % out_proj_input) + self % output = self % out_proj % output + end subroutine forward +end module nf_fc2d_layer diff --git a/test/test_fc2d_layer.f90 b/test/test_fc2d_layer.f90 new file mode 100644 index 00000000..9a93eaad --- /dev/null +++ b/test/test_fc2d_layer.f90 @@ -0,0 +1,67 @@ +program test_fc2d_layer + use iso_fortran_env, only: stderr => error_unit + use nf_fc2d_layer, only: fc2d_layer + use nf, only: relu + implicit none + + logical :: ok = .true. + real :: sample_input(3, 4) = reshape(& + [0.0, -10.1, 0.2, 10.3, 0.4, 10.5, -0.6, 10.7, 10.8, 0.9, 0.11, 0.12],& + [3, 4]) + real :: sample_gradient(3, 4) = reshape([0.1, 3., 2., 0.1, 3., 3., 0.1, 2., 0.1, 3., 0.1, 3.], [3, 4]) + type(fc2d_layer) :: fc + + fc = fc2d_layer(hidden_size=5, activation=relu()) + call fc % init([3, 4]) + fc % in_proj % weights = 0.1 + fc % in_proj % biases = 0.11 + fc % out_proj % weights = 0.1 + fc % out_proj % biases = 0.11 + + call test_fc2d_layer_forward(fc, ok, sample_input) + + if (ok) then + print '(a)', 'test_fc2d_layer: All tests passed.' + else + write(stderr, '(a)') 'test_fc2d_layer: One or more tests failed.' + stop 1 + end if + +contains + function allclose(x, y) result(res) + real, intent(in) :: x(:) + real, intent(in) :: y(:) + logical :: res + + res = all(abs(x - y) <= (1e-06 + 1e-05 * abs(y))) + end function allclose + + subroutine test_fc2d_layer_forward(fc, ok, input) + type(fc2d_layer), intent(in out) :: fc + logical, intent(in out) :: ok + real, intent(in) :: input(3, 4) + real :: output_shape(2) + real :: output_flat(12) + real :: expected_shape(2) = [3, 4] + real :: expected_output_flat(12) = [& + 0.695, 0.2205, 1.246,& + 0.695, 0.2205, 1.246,& + 0.695, 0.2205, 1.246,& + 0.695, 0.2205, 1.246& + ] + + call fc % forward(input) + + output_shape = shape(fc % output) + if (.not. all(output_shape.eq.expected_shape)) then + ok = .false. + write(stderr, '(a)') 'forward returned incorrect shape.. failed' + end if + output_flat = reshape(fc % output, shape(output_flat)) + if (.not. allclose(output_flat, expected_output_flat)) then + ok = .false. + write(stderr, '(a)') 'forward returned incorrect values.. failed' + end if + end subroutine test_fc2d_layer_forward + +end program test_fc2d_layer \ No newline at end of file From 3ff960889ddb823cfcfd9bc317a2cd1efec1cf0d Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 24 Feb 2025 20:23:40 +0400 Subject: [PATCH 2/8] fc2d_layer: backward pass implementation --- src/nf/nf_fc2d_layer.f90 | 25 ++++++++- test/test_fc2d_layer.f90 | 111 +++++++++++++++++++++++++++++++++------ 2 files changed, 119 insertions(+), 17 deletions(-) diff --git a/src/nf/nf_fc2d_layer.f90 b/src/nf/nf_fc2d_layer.f90 index ab906988..77221ff3 100644 --- a/src/nf/nf_fc2d_layer.f90 +++ b/src/nf/nf_fc2d_layer.f90 @@ -24,7 +24,7 @@ module nf_fc2d_layer real, allocatable :: output(:, :) contains -! procedure :: backward + procedure :: backward procedure :: forward ! procedure :: get_num_params ! procedure :: get_params @@ -51,6 +51,11 @@ module function fc2d_layer_cons(hidden_size, activation) result(res) res % hidden_size = hidden_size res % activation_name = activation % get_name() + ! FIXME: implement correct derivative for `softmax` + if (res % activation_name == 'softmax') then + write(stderr, '(a)') '`softmax` activation is temporarily unavailable' + error stop 1 + end if allocate(res % activation, source = activation) end function fc2d_layer_cons @@ -75,6 +80,8 @@ module subroutine init(self, input_shape) allocate(self % out_proj_input(self % sequence_length, self % hidden_size)) allocate(self % output(self % sequence_length, self % model_dimension)) + + allocate(self % gradient, mold=self % in_proj % gradient) end subroutine init pure module subroutine forward(self, input) @@ -92,4 +99,20 @@ pure module subroutine forward(self, input) call self % out_proj % forward(self % out_proj_input) self % output = self % out_proj % output end subroutine forward + + pure module subroutine backward(self, input, gradient) + class(fc2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :) + real, intent(in) :: gradient(:, :) + integer :: i + + call self % out_proj % backward(self % out_proj_input, gradient) + do concurrent(i = 1: self % sequence_length) + self % out_proj % gradient(i, :) = self % out_proj % gradient(i, :) & + * (self % activation % eval_1d_prime(self % in_proj % output(i, :))) + end do + call self % in_proj % backward(self % in_proj_input, self % out_proj % gradient) + + self % gradient = self % in_proj % gradient + end subroutine backward end module nf_fc2d_layer diff --git a/test/test_fc2d_layer.f90 b/test/test_fc2d_layer.f90 index 9a93eaad..37713dc1 100644 --- a/test/test_fc2d_layer.f90 +++ b/test/test_fc2d_layer.f90 @@ -1,7 +1,7 @@ program test_fc2d_layer use iso_fortran_env, only: stderr => error_unit use nf_fc2d_layer, only: fc2d_layer - use nf, only: relu + use nf, only: activation_function, relu, tanhf, sigmoid, softplus implicit none logical :: ok = .true. @@ -11,20 +11,53 @@ program test_fc2d_layer real :: sample_gradient(3, 4) = reshape([0.1, 3., 2., 0.1, 3., 3., 0.1, 2., 0.1, 3., 0.1, 3.], [3, 4]) type(fc2d_layer) :: fc - fc = fc2d_layer(hidden_size=5, activation=relu()) - call fc % init([3, 4]) - fc % in_proj % weights = 0.1 - fc % in_proj % biases = 0.11 - fc % out_proj % weights = 0.1 - fc % out_proj % biases = 0.11 - - call test_fc2d_layer_forward(fc, ok, sample_input) + call test_fc2d_layer_forward(ok, sample_input) + call test_fc2d_layer_backward(& + ok, sample_input, sample_gradient,& + activation=relu(),& + expected_gradient_flat=[& + 0.198, 0.486, 0.486,& + 0.396, 0.972, 0.972,& + 0.594, 1.458, 1.458,& + 0.792, 1.944, 1.944& + ]& + ) + call test_fc2d_layer_backward(& + ok, sample_input, sample_gradient,& + activation=sigmoid(),& + expected_gradient_flat=[& + 0.01068044, 0.02734236, 0.00086295,& + 0.02136087, 0.05140798, 0.00172666,& + 0.03357822, 0.07555774, 0.00266102,& + 0.04567052, 0.10338347, 0.0038053& + ]& + ) + call test_fc2d_layer_backward(& + ok, sample_input, sample_gradient,& + activation=tanhf(),& + expected_gradient_flat=[& + 3.7096841e-03, 9.3461145e-03, 1.1113838e-05,& + 7.4193683e-03, 1.6985621e-02, 2.2227676e-05,& + 1.2096796e-02, 2.4647098e-02, 3.3862932e-05,& + 1.6650427e-02, 3.4423053e-02, 5.0007438e-05& + ]& + ) + call test_fc2d_layer_backward(& + ok, sample_input, sample_gradient,& + activation=softplus(),& + expected_gradient_flat=[& + 0.18651924, 0.45662752, 0.48513436,& + 0.37303847, 0.9168981, 0.9702679,& + 0.5578177, 1.3770795, 1.4553307,& + 0.7427467, 1.8331366, 1.9401824& + ]& + ) if (ok) then print '(a)', 'test_fc2d_layer: All tests passed.' else write(stderr, '(a)') 'test_fc2d_layer: One or more tests failed.' - stop 1 + error stop 1 end if contains @@ -36,20 +69,35 @@ function allclose(x, y) result(res) res = all(abs(x - y) <= (1e-06 + 1e-05 * abs(y))) end function allclose - subroutine test_fc2d_layer_forward(fc, ok, input) - type(fc2d_layer), intent(in out) :: fc + subroutine init_weigths(fc) + type(fc2d_layer) :: fc + fc % in_proj % weights = reshape(& + [0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.5, 0.1, 0.2, 0.4, 0.5, 0.1, 0.3, 0.4, 0.5, 0.2, 0.3, 0.4, 0.5],& + [4, 5]& + ) + fc % in_proj % biases = 0.11 + fc % out_proj % weights = 0.1 + fc % out_proj % biases = 0.11 + end subroutine init_weigths + + subroutine test_fc2d_layer_forward(ok, input) logical, intent(in out) :: ok real, intent(in) :: input(3, 4) + type(fc2d_layer) :: fc real :: output_shape(2) real :: output_flat(12) real :: expected_shape(2) = [3, 4] real :: expected_output_flat(12) = [& - 0.695, 0.2205, 1.246,& - 0.695, 0.2205, 1.246,& - 0.695, 0.2205, 1.246,& - 0.695, 0.2205, 1.246& + 1.509, 1.5594, 3.4098,& + 1.509, 1.5594, 3.4098,& + 1.509, 1.5594, 3.4098,& + 1.509, 1.5594, 3.4098& ] + fc = fc2d_layer(hidden_size=5, activation=relu()) + call fc % init([3, 4]) + call init_weigths(fc) + call fc % forward(input) output_shape = shape(fc % output) @@ -64,4 +112,35 @@ subroutine test_fc2d_layer_forward(fc, ok, input) end if end subroutine test_fc2d_layer_forward + subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gradient_flat) + logical, intent(in out) :: ok + real, intent(in) :: input(3, 4) + real, intent(in) :: gradient(3, 4) + class(activation_function), intent(in) :: activation + real, intent(in) :: expected_gradient_flat(12) + + type(fc2d_layer) :: fc + + integer :: gradient_shape(2) + integer :: expected_gradient_shape(2) = [3, 4] + real :: gradient_flat(12) + + fc = fc2d_layer(hidden_size=5, activation=activation) + call fc % init([3, 4]) + call init_weigths(fc) + + call fc % forward(input) + call fc % backward(input, gradient) + + gradient_shape = shape(fc % gradient) + if (.not. all(gradient_shape.eq.expected_gradient_shape)) then + ok = .false. + write(stderr, '(a) (a)') 'backward returned incorrect gradient shape.. failed', fc % activation % get_name() + end if + gradient_flat = reshape(fc % gradient, shape(gradient_flat)) + if (.not. allclose(gradient_flat, expected_gradient_flat)) then + ok = .false. + write(stderr, '(aa)') 'backward returned incorrect gradient values.. failed for ', fc % activation % get_name() + end if + end subroutine test_fc2d_layer_backward end program test_fc2d_layer \ No newline at end of file From 07fb4d7bd58e5405043d9b12dc57af6aab68eecf Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 24 Feb 2025 22:03:20 +0400 Subject: [PATCH 3/8] fc2d_layer: parameters --- src/nf/nf_fc2d_layer.f90 | 64 +++++++++++++++++++++++++++++++++++++--- test/test_fc2d_layer.f90 | 50 +++++++++++++++++++++++++++++-- 2 files changed, 107 insertions(+), 7 deletions(-) diff --git a/src/nf/nf_fc2d_layer.f90 b/src/nf/nf_fc2d_layer.f90 index 77221ff3..fc7e9e34 100644 --- a/src/nf/nf_fc2d_layer.f90 +++ b/src/nf/nf_fc2d_layer.f90 @@ -26,10 +26,10 @@ module nf_fc2d_layer contains procedure :: backward procedure :: forward -! procedure :: get_num_params -! procedure :: get_params -! procedure :: get_gradients -! procedure :: set_params + procedure :: get_num_params + procedure :: get_params + procedure :: get_gradients + procedure :: set_params procedure :: init end type fc2d_layer @@ -115,4 +115,60 @@ pure module subroutine backward(self, input, gradient) self % gradient = self % in_proj % gradient end subroutine backward + + elemental module function get_num_params(self) result(num_params) + class(fc2d_layer), intent(in) :: self + integer :: num_params + + num_params = self % in_proj % get_num_params() + self % out_proj % get_num_params() + end function get_num_params + + module function get_params(self) result(params) + class(fc2d_layer), intent(in) :: self + real, allocatable :: params(:) + + params = [& + self % in_proj % weights,& + self % out_proj % weights,& + self % in_proj % biases,& + self % out_proj % biases& + ] + end function get_params + + module function get_gradients(self) result(gradients) + class(fc2d_layer), intent(in), target :: self + real, allocatable :: gradients(:) + + gradients = [ & + self % in_proj % dw,& + self % out_proj % dw,& + self % in_proj % db,& + self % out_proj % db& + ] + end function get_gradients + + module subroutine set_params(self, params) + class(fc2d_layer), intent(in out) :: self + real, intent(in) :: params(:) + integer :: i, j, window + + ! check if the number of parameters is correct + if (size(params) /= self % get_num_params()) then + error stop 'Error: number of parameters does not match' + end if + + ! FIXME: looks clumsy, better ideas? + associate (transformation => self % model_dimension * self % hidden_size) + self % in_proj % weights = reshape(params(1: transformation), shape(self % in_proj % weights)) + self % out_proj % weights = reshape(& + params(transformation + 1: 2 * transformation),& + shape(self % out_proj % weights)& + ) + self % in_proj % biases = params(2 * transformation + 1: 2 * transformation + self % hidden_size) + self % out_proj % biases = params(& + 2 * transformation + self % hidden_size + 1: & + 2 * transformation + self % hidden_size + self % model_dimension& + ) + end associate + end subroutine set_params end module nf_fc2d_layer diff --git a/test/test_fc2d_layer.f90 b/test/test_fc2d_layer.f90 index 37713dc1..62e709cc 100644 --- a/test/test_fc2d_layer.f90 +++ b/test/test_fc2d_layer.f90 @@ -1,7 +1,7 @@ program test_fc2d_layer use iso_fortran_env, only: stderr => error_unit use nf_fc2d_layer, only: fc2d_layer - use nf, only: activation_function, relu, tanhf, sigmoid, softplus + use nf, only: activation_function, relu, tanhf, sigmoid, softplus, sgd implicit none logical :: ok = .true. @@ -52,6 +52,7 @@ program test_fc2d_layer 0.7427467, 1.8331366, 1.9401824& ]& ) + call test_fc2d_layer_update_gradients(ok, sample_input, sample_gradient) if (ok) then print '(a)', 'test_fc2d_layer: All tests passed.' @@ -135,7 +136,7 @@ subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gr gradient_shape = shape(fc % gradient) if (.not. all(gradient_shape.eq.expected_gradient_shape)) then ok = .false. - write(stderr, '(a) (a)') 'backward returned incorrect gradient shape.. failed', fc % activation % get_name() + write(stderr, '(aa)') 'backward returned incorrect gradient shape.. failed', fc % activation % get_name() end if gradient_flat = reshape(fc % gradient, shape(gradient_flat)) if (.not. allclose(gradient_flat, expected_gradient_flat)) then @@ -143,4 +144,47 @@ subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gr write(stderr, '(aa)') 'backward returned incorrect gradient values.. failed for ', fc % activation % get_name() end if end subroutine test_fc2d_layer_backward -end program test_fc2d_layer \ No newline at end of file + + subroutine test_fc2d_layer_update_gradients(ok, input, gradient) + logical, intent(in out) :: ok + real, intent(in) :: input(3, 4) + real, intent(in) :: gradient(3, 4) + + type(fc2d_layer) :: fc + type(sgd) :: optim + + real :: parameters(49) + real :: updated_output(12) + real :: expected_updated_output(12) = [& + -1.1192487, -0.51458186, -2.2737966,& + -1.7527609, -0.8190526, -3.5071785,& + 0.36815026, 0.2097921, 0.6197472,& + -1.7491575, -0.79099315, -3.4819508& + ] + + fc = fc2d_layer(hidden_size=5, activation=softplus()) + call fc % init([3, 4]) + call init_weigths(fc) + + call fc % forward(input) + call fc % backward(input, gradient) + + if (fc % get_num_params() /= 49) then + ok = .false. + write(stderr, '(a)') 'incorrect number of parameters.. failed' + end if + + optim = SGD(learning_rate=0.01) + parameters = fc % get_params() + call optim % minimize(parameters, fc % get_gradients()) + call fc % set_params(parameters) + + call fc % forward(input) + + updated_output = reshape(fc % output, [12]) + if (.not. allclose(updated_output, expected_updated_output)) then + ok = .false. + write(stderr, '(a)') 'incorrect output after parameters update.. failed' + end if + end subroutine test_fc2d_layer_update_gradients +end program test_fc2d_layer From 7e7454bfe4036f5ac4e6ab0328a1b6594d82451b Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 24 Feb 2025 22:17:06 +0400 Subject: [PATCH 4/8] fc2d_layer: submodule, comments, style fixes --- src/nf/nf_fc2d_layer.f90 | 168 +++++++---------------------- src/nf/nf_fc2d_layer_submodule.f90 | 138 ++++++++++++++++++++++++ test/test_fc2d_layer.f90 | 9 +- 3 files changed, 184 insertions(+), 131 deletions(-) create mode 100644 src/nf/nf_fc2d_layer_submodule.f90 diff --git a/src/nf/nf_fc2d_layer.f90 b/src/nf/nf_fc2d_layer.f90 index fc7e9e34..78288457 100644 --- a/src/nf/nf_fc2d_layer.f90 +++ b/src/nf/nf_fc2d_layer.f90 @@ -10,6 +10,8 @@ module nf_fc2d_layer public :: fc2d_layer type, extends(base_layer) :: fc2d_layer + !! Fully Connected 2D Layer + !! Two Linear layers with an activation function in between integer :: sequence_length, hidden_size, model_dimension type(linear2d_layer) :: in_proj @@ -42,133 +44,41 @@ module function fc2d_layer_cons(hidden_size, activation) result(res) end function fc2d_layer_cons end interface fc2d_layer -contains - module function fc2d_layer_cons(hidden_size, activation) result(res) - !! This function returns the `fc2d_layer` instance. - integer, intent(in) :: hidden_size - class(activation_function), intent(in) :: activation - type(fc2d_layer) :: res - - res % hidden_size = hidden_size - res % activation_name = activation % get_name() - ! FIXME: implement correct derivative for `softmax` - if (res % activation_name == 'softmax') then - write(stderr, '(a)') '`softmax` activation is temporarily unavailable' - error stop 1 - end if - allocate(res % activation, source = activation) - end function fc2d_layer_cons - - module subroutine init(self, input_shape) - class(fc2d_layer), intent(in out) :: self - integer, intent(in) :: input_shape(:) - - if (size(input_shape) /= 2) then - error stop "fc2d_layer accepts 2D input" - end if - - self % sequence_length = input_shape(1) - self % model_dimension = input_shape(2) - - self % in_proj = linear2d_layer(self % hidden_size) - call self % in_proj % init([self % sequence_length, self % model_dimension]) - - self % out_proj = linear2d_layer(self % model_dimension) - call self % out_proj % init([self % sequence_length, self % hidden_size]) - - allocate(self % in_proj_input(self % sequence_length, self % model_dimension)) - allocate(self % out_proj_input(self % sequence_length, self % hidden_size)) - - allocate(self % output(self % sequence_length, self % model_dimension)) - - allocate(self % gradient, mold=self % in_proj % gradient) - end subroutine init - - pure module subroutine forward(self, input) - class(fc2d_layer), intent(in out) :: self - real, intent(in) :: input(:, :) - integer :: i - - self % in_proj_input = input - call self % in_proj % forward(input) - - do concurrent(i = 1: self % sequence_length) - self % out_proj_input(i, :) = self % activation % eval_1d(self % in_proj % output(i, :)) - end do - - call self % out_proj % forward(self % out_proj_input) - self % output = self % out_proj % output - end subroutine forward - - pure module subroutine backward(self, input, gradient) - class(fc2d_layer), intent(in out) :: self - real, intent(in) :: input(:, :) - real, intent(in) :: gradient(:, :) - integer :: i - - call self % out_proj % backward(self % out_proj_input, gradient) - do concurrent(i = 1: self % sequence_length) - self % out_proj % gradient(i, :) = self % out_proj % gradient(i, :) & - * (self % activation % eval_1d_prime(self % in_proj % output(i, :))) - end do - call self % in_proj % backward(self % in_proj_input, self % out_proj % gradient) - - self % gradient = self % in_proj % gradient - end subroutine backward - - elemental module function get_num_params(self) result(num_params) - class(fc2d_layer), intent(in) :: self - integer :: num_params - - num_params = self % in_proj % get_num_params() + self % out_proj % get_num_params() - end function get_num_params - - module function get_params(self) result(params) - class(fc2d_layer), intent(in) :: self - real, allocatable :: params(:) - - params = [& - self % in_proj % weights,& - self % out_proj % weights,& - self % in_proj % biases,& - self % out_proj % biases& - ] - end function get_params - - module function get_gradients(self) result(gradients) - class(fc2d_layer), intent(in), target :: self - real, allocatable :: gradients(:) - - gradients = [ & - self % in_proj % dw,& - self % out_proj % dw,& - self % in_proj % db,& - self % out_proj % db& - ] - end function get_gradients - - module subroutine set_params(self, params) - class(fc2d_layer), intent(in out) :: self - real, intent(in) :: params(:) - integer :: i, j, window - - ! check if the number of parameters is correct - if (size(params) /= self % get_num_params()) then - error stop 'Error: number of parameters does not match' - end if - - ! FIXME: looks clumsy, better ideas? - associate (transformation => self % model_dimension * self % hidden_size) - self % in_proj % weights = reshape(params(1: transformation), shape(self % in_proj % weights)) - self % out_proj % weights = reshape(& - params(transformation + 1: 2 * transformation),& - shape(self % out_proj % weights)& - ) - self % in_proj % biases = params(2 * transformation + 1: 2 * transformation + self % hidden_size) - self % out_proj % biases = params(& - 2 * transformation + self % hidden_size + 1: & - 2 * transformation + self % hidden_size + self % model_dimension& - ) - end associate - end subroutine set_params + interface + module subroutine init(self, input_shape) + class(fc2d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + end subroutine init + + pure module subroutine forward(self, input) + class(fc2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :) + end subroutine forward + + pure module subroutine backward(self, input, gradient) + class(fc2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :) + real, intent(in) :: gradient(:, :) + end subroutine backward + + elemental module function get_num_params(self) result(num_params) + class(fc2d_layer), intent(in) :: self + integer :: num_params + end function get_num_params + + module function get_params(self) result(params) + class(fc2d_layer), intent(in) :: self + real, allocatable :: params(:) + end function get_params + + module function get_gradients(self) result(gradients) + class(fc2d_layer), intent(in), target :: self + real, allocatable :: gradients(:) + end function get_gradients + + module subroutine set_params(self, params) + class(fc2d_layer), intent(in out) :: self + real, intent(in) :: params(:) + end subroutine set_params + end interface end module nf_fc2d_layer diff --git a/src/nf/nf_fc2d_layer_submodule.f90 b/src/nf/nf_fc2d_layer_submodule.f90 new file mode 100644 index 00000000..60aa9c7a --- /dev/null +++ b/src/nf/nf_fc2d_layer_submodule.f90 @@ -0,0 +1,138 @@ +submodule(nf_fc2d_layer) nf_fc2d_layer_submodule + use nf_activation, only: activation_function + use nf_base_layer, only: base_layer + use nf_linear2d_layer, only: linear2d_layer + + implicit none + +contains + module function fc2d_layer_cons(hidden_size, activation) result(res) + !! This function returns the `fc2d_layer` instance. + integer, intent(in) :: hidden_size + class(activation_function), intent(in) :: activation + type(fc2d_layer) :: res + + res % hidden_size = hidden_size + res % activation_name = activation % get_name() + ! FIXME: implement correct derivative for `softmax` + if (res % activation_name == 'softmax') then + write(stderr, '(a)') '`softmax` activation is temporarily unavailable' + error stop 1 + end if + allocate(res % activation, source = activation) + end function fc2d_layer_cons + + module subroutine init(self, input_shape) + class(fc2d_layer), intent(in out) :: self + integer, intent(in) :: input_shape(:) + + if (size(input_shape) /= 2) then + error stop "fc2d_layer accepts 2D input" + end if + + self % sequence_length = input_shape(1) + self % model_dimension = input_shape(2) + + self % in_proj = linear2d_layer(self % hidden_size) + call self % in_proj % init([self % sequence_length, self % model_dimension]) + + self % out_proj = linear2d_layer(self % model_dimension) + call self % out_proj % init([self % sequence_length, self % hidden_size]) + + allocate(self % in_proj_input(self % sequence_length, self % model_dimension)) + allocate(self % out_proj_input(self % sequence_length, self % hidden_size)) + + allocate(self % output(self % sequence_length, self % model_dimension)) + + allocate(self % gradient, mold=self % in_proj % gradient) + end subroutine init + + pure module subroutine forward(self, input) + class(fc2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :) + integer :: i + + self % in_proj_input = input + call self % in_proj % forward(input) + + do concurrent(i = 1: self % sequence_length) + self % out_proj_input(i, :) = self % activation % eval_1d(self % in_proj % output(i, :)) + end do + + call self % out_proj % forward(self % out_proj_input) + self % output = self % out_proj % output + end subroutine forward + + pure module subroutine backward(self, input, gradient) + class(fc2d_layer), intent(in out) :: self + real, intent(in) :: input(:, :) + real, intent(in) :: gradient(:, :) + integer :: i + + call self % out_proj % backward(self % out_proj_input, gradient) + ! d_output/d_activation = d_output/d_output_proj * d/d_activation + do concurrent(i = 1: self % sequence_length) + self % out_proj % gradient(i, :) = & + self % out_proj % gradient(i, :) & + * (self % activation % eval_1d_prime(self % in_proj % output(i, :))) + end do + call self % in_proj % backward(self % in_proj_input, self % out_proj % gradient) + + self % gradient = self % in_proj % gradient + end subroutine backward + + elemental module function get_num_params(self) result(num_params) + class(fc2d_layer), intent(in) :: self + integer :: num_params + + num_params = self % in_proj % get_num_params() + self % out_proj % get_num_params() + end function get_num_params + + module function get_params(self) result(params) + class(fc2d_layer), intent(in) :: self + real, allocatable :: params(:) + + params = [& + self % in_proj % weights,& + self % out_proj % weights,& + self % in_proj % biases,& + self % out_proj % biases& + ] + end function get_params + + module function get_gradients(self) result(gradients) + class(fc2d_layer), intent(in), target :: self + real, allocatable :: gradients(:) + + gradients = [ & + self % in_proj % dw,& + self % out_proj % dw,& + self % in_proj % db,& + self % out_proj % db& + ] + end function get_gradients + + module subroutine set_params(self, params) + class(fc2d_layer), intent(in out) :: self + real, intent(in) :: params(:) + + ! check if the number of parameters is correct + if (size(params) /= self % get_num_params()) then + error stop 'Error: number of parameters does not match' + end if + + ! FIXME: looks clumsy, better ideas? + associate (transformation => self % model_dimension * self % hidden_size) + self % in_proj % weights = reshape(params(1: transformation), shape(self % in_proj % weights)) + self % out_proj % weights = reshape(& + params(transformation + 1: 2 * transformation),& + shape(self % out_proj % weights)& + ) + self % in_proj % biases = params(2 * transformation + 1: 2 * transformation + self % hidden_size) + self % out_proj % biases = params(& + 2 * transformation + self % hidden_size + 1: & + 2 * transformation + self % hidden_size + self % model_dimension& + ) + end associate + end subroutine set_params +end submodule nf_fc2d_layer_submodule diff --git a/test/test_fc2d_layer.f90 b/test/test_fc2d_layer.f90 index 62e709cc..64f6df18 100644 --- a/test/test_fc2d_layer.f90 +++ b/test/test_fc2d_layer.f90 @@ -73,7 +73,12 @@ end function allclose subroutine init_weigths(fc) type(fc2d_layer) :: fc fc % in_proj % weights = reshape(& - [0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.5, 0.1, 0.2, 0.4, 0.5, 0.1, 0.3, 0.4, 0.5, 0.2, 0.3, 0.4, 0.5],& + [& + 0.1, 0.2, 0.3, 0.4, 0.1,& + 0.2, 0.3, 0.5, 0.1, 0.2,& + 0.4, 0.5, 0.1, 0.3, 0.4,& + 0.5, 0.2, 0.3, 0.4, 0.5& + ],& [4, 5]& ) fc % in_proj % biases = 0.11 @@ -136,7 +141,7 @@ subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gr gradient_shape = shape(fc % gradient) if (.not. all(gradient_shape.eq.expected_gradient_shape)) then ok = .false. - write(stderr, '(aa)') 'backward returned incorrect gradient shape.. failed', fc % activation % get_name() + write(stderr, '(aa)') 'backward returned incorrect gradient shape.. failed for', fc % activation % get_name() end if gradient_flat = reshape(fc % gradient, shape(gradient_flat)) if (.not. allclose(gradient_flat, expected_gradient_flat)) then From 1f57149d8c05da31e84527650a7abc83881a68ac Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 24 Feb 2025 23:22:21 +0400 Subject: [PATCH 5/8] fc2d_layer: make output size variable --- src/nf/nf_fc2d_layer.f90 | 6 ++--- src/nf/nf_fc2d_layer_submodule.f90 | 34 +++++++++++++++------------- test/test_fc2d_layer.f90 | 36 ++++++++++++++++++++++++++---- 3 files changed, 53 insertions(+), 23 deletions(-) diff --git a/src/nf/nf_fc2d_layer.f90 b/src/nf/nf_fc2d_layer.f90 index 78288457..fc30bc37 100644 --- a/src/nf/nf_fc2d_layer.f90 +++ b/src/nf/nf_fc2d_layer.f90 @@ -12,7 +12,7 @@ module nf_fc2d_layer type, extends(base_layer) :: fc2d_layer !! Fully Connected 2D Layer !! Two Linear layers with an activation function in between - integer :: sequence_length, hidden_size, model_dimension + integer :: sequence_length, model_dimension, hidden_size, output_size type(linear2d_layer) :: in_proj type(linear2d_layer) :: out_proj @@ -36,9 +36,9 @@ module nf_fc2d_layer end type fc2d_layer interface fc2d_layer - module function fc2d_layer_cons(hidden_size, activation) result(res) + module function fc2d_layer_cons(hidden_size, output_size, activation) result(res) !! This function returns the `fc2d_layer` instance. - integer, intent(in) :: hidden_size + integer, intent(in) :: hidden_size, output_size class(activation_function), intent(in) :: activation type(fc2d_layer) :: res end function fc2d_layer_cons diff --git a/src/nf/nf_fc2d_layer_submodule.f90 b/src/nf/nf_fc2d_layer_submodule.f90 index 60aa9c7a..83bf5866 100644 --- a/src/nf/nf_fc2d_layer_submodule.f90 +++ b/src/nf/nf_fc2d_layer_submodule.f90 @@ -6,13 +6,14 @@ implicit none contains - module function fc2d_layer_cons(hidden_size, activation) result(res) + module function fc2d_layer_cons(hidden_size, output_size, activation) result(res) !! This function returns the `fc2d_layer` instance. - integer, intent(in) :: hidden_size + integer, intent(in) :: hidden_size, output_size class(activation_function), intent(in) :: activation type(fc2d_layer) :: res res % hidden_size = hidden_size + res % output_size = output_size res % activation_name = activation % get_name() ! FIXME: implement correct derivative for `softmax` if (res % activation_name == 'softmax') then @@ -36,13 +37,13 @@ module subroutine init(self, input_shape) self % in_proj = linear2d_layer(self % hidden_size) call self % in_proj % init([self % sequence_length, self % model_dimension]) - self % out_proj = linear2d_layer(self % model_dimension) + self % out_proj = linear2d_layer(self % output_size) call self % out_proj % init([self % sequence_length, self % hidden_size]) allocate(self % in_proj_input(self % sequence_length, self % model_dimension)) allocate(self % out_proj_input(self % sequence_length, self % hidden_size)) - allocate(self % output(self % sequence_length, self % model_dimension)) + allocate(self % output(self % sequence_length, self % output_size)) allocate(self % gradient, mold=self % in_proj % gradient) end subroutine init @@ -115,6 +116,7 @@ end function get_gradients module subroutine set_params(self, params) class(fc2d_layer), intent(in out) :: self real, intent(in) :: params(:) + integer :: i, j, window ! check if the number of parameters is correct if (size(params) /= self % get_num_params()) then @@ -122,17 +124,17 @@ module subroutine set_params(self, params) end if ! FIXME: looks clumsy, better ideas? - associate (transformation => self % model_dimension * self % hidden_size) - self % in_proj % weights = reshape(params(1: transformation), shape(self % in_proj % weights)) - self % out_proj % weights = reshape(& - params(transformation + 1: 2 * transformation),& - shape(self % out_proj % weights)& - ) - self % in_proj % biases = params(2 * transformation + 1: 2 * transformation + self % hidden_size) - self % out_proj % biases = params(& - 2 * transformation + self % hidden_size + 1: & - 2 * transformation + self % hidden_size + self % model_dimension& - ) - end associate + i = 1 + j = self % model_dimension * self % hidden_size + self % in_proj % weights = reshape(params(i: j), [self % model_dimension, self % hidden_size]) + i = j + 1 + j = i + self % hidden_size * self % output_size - 1 + self % out_proj % weights = reshape(params(i: j), [self % hidden_size, self % output_size]) + i = j + 1 + j = i + self % hidden_size - 1 + self % in_proj % biases = params(i: j) + i = j + 1 + j = i + self % output_size - 1 + self % out_proj % biases = params(i: j) end subroutine set_params end submodule nf_fc2d_layer_submodule diff --git a/test/test_fc2d_layer.f90 b/test/test_fc2d_layer.f90 index 64f6df18..2b22ee70 100644 --- a/test/test_fc2d_layer.f90 +++ b/test/test_fc2d_layer.f90 @@ -1,7 +1,7 @@ program test_fc2d_layer use iso_fortran_env, only: stderr => error_unit use nf_fc2d_layer, only: fc2d_layer - use nf, only: activation_function, relu, tanhf, sigmoid, softplus, sgd + use nf, only: activation_function, relu, celu, tanhf, sigmoid, softplus, sgd implicit none logical :: ok = .true. @@ -12,6 +12,7 @@ program test_fc2d_layer type(fc2d_layer) :: fc call test_fc2d_layer_forward(ok, sample_input) + call test_fc2d_layer_forward_different_shape(ok, sample_input) call test_fc2d_layer_backward(& ok, sample_input, sample_gradient,& activation=relu(),& @@ -100,7 +101,7 @@ subroutine test_fc2d_layer_forward(ok, input) 1.509, 1.5594, 3.4098& ] - fc = fc2d_layer(hidden_size=5, activation=relu()) + fc = fc2d_layer(hidden_size=5, output_size=4, activation=relu()) call fc % init([3, 4]) call init_weigths(fc) @@ -118,6 +119,33 @@ subroutine test_fc2d_layer_forward(ok, input) end if end subroutine test_fc2d_layer_forward + subroutine test_fc2d_layer_forward_different_shape(ok, input) + logical, intent(in out) :: ok + real, intent(in) :: input(3, 4) + type(fc2d_layer) :: fc + real :: output_shape(2) + real :: output_flat(3) + real :: expected_shape(2) = [3, 1] + real :: expected_output_flat(3) = [1.509, 1.5594, 3.4098] + + fc = fc2d_layer(hidden_size=5, output_size=1, activation=celu()) + call fc % init([3, 4]) + call init_weigths(fc) + + call fc % forward(input) + + output_shape = shape(fc % output) + if (.not. all(output_shape.eq.expected_shape)) then + ok = .false. + write(stderr, '(a)') 'forward returned incorrect shape.. failed' + end if + output_flat = reshape(fc % output, shape(output_flat)) + if (.not. allclose(output_flat, expected_output_flat)) then + ok = .false. + write(stderr, '(a)') 'forward returned incorrect values.. failed' + end if + end subroutine test_fc2d_layer_forward_different_shape + subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gradient_flat) logical, intent(in out) :: ok real, intent(in) :: input(3, 4) @@ -131,7 +159,7 @@ subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gr integer :: expected_gradient_shape(2) = [3, 4] real :: gradient_flat(12) - fc = fc2d_layer(hidden_size=5, activation=activation) + fc = fc2d_layer(hidden_size=5, output_size=4, activation=activation) call fc % init([3, 4]) call init_weigths(fc) @@ -167,7 +195,7 @@ subroutine test_fc2d_layer_update_gradients(ok, input, gradient) -1.7491575, -0.79099315, -3.4819508& ] - fc = fc2d_layer(hidden_size=5, activation=softplus()) + fc = fc2d_layer(hidden_size=5, output_size=4, activation=softplus()) call fc % init([3, 4]) call init_weigths(fc) From f8fb9773e2284e2c10d7b1d500abdf474e10486b Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 24 Feb 2025 23:44:31 +0400 Subject: [PATCH 6/8] fc2d_layer: plumbing --- src/nf.f90 | 3 +- src/nf/nf_layer_constructors.f90 | 26 ++++++----- src/nf/nf_layer_constructors_submodule.f90 | 10 +++++ src/nf/nf_layer_submodule.f90 | 52 ++++++++++++++++++++++ src/nf/nf_network_submodule.f90 | 3 ++ 5 files changed, 83 insertions(+), 11 deletions(-) diff --git a/src/nf.f90 b/src/nf.f90 index 39f67ea3..53a9d7ed 100644 --- a/src/nf.f90 +++ b/src/nf.f90 @@ -11,7 +11,8 @@ module nf linear2d, & maxpool2d, & reshape, & - self_attention + self_attention, & + fc2d use nf_loss, only: mse, quadratic use nf_metrics, only: corr, maxabs use nf_network, only: network diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90 index db60cf0f..56959a9d 100644 --- a/src/nf/nf_layer_constructors.f90 +++ b/src/nf/nf_layer_constructors.f90 @@ -17,7 +17,8 @@ module nf_layer_constructors linear2d, & maxpool2d, & reshape, & - self_attention + self_attention, & + fc2d interface input @@ -222,16 +223,21 @@ module function linear2d(out_features) result(res) !! Resulting layer instance end function linear2d - module function self_attention(num_heads) result(res) - !! Rank-2 (sequence_length, out_features) self attention constructor. - !! sequence_length and model_dimension are determined at layer initialization, based on the - !! output shape of the previous layer. - integer, intent(in) :: num_heads - !! Number of attention heads - type(layer) :: res - !! Resulting layer instance - end function self_attention + module function self_attention(num_heads) result(res) + !! Rank-2 (sequence_length, out_features) self attention constructor. + !! sequence_length and model_dimension are determined at layer initialization, based on the + !! output shape of the previous layer. + integer, intent(in) :: num_heads + !! Number of attention heads + type(layer) :: res + !! Resulting layer instance + end function self_attention + module function fc2d(hidden_size, output_size, activation) result(res) + integer, intent(in) :: hidden_size, output_size + class(activation_function), intent(in) :: activation + type(layer) :: res + end function fc2d end interface end module nf_layer_constructors diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 9e5322c1..40802712 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -12,6 +12,7 @@ use nf_reshape_layer, only: reshape3d_layer use nf_linear2d_layer, only: linear2d_layer use nf_self_attention_layer, only: self_attention_layer + use nf_fc2d_layer, only: fc2d_layer use nf_activation, only: activation_function, relu, sigmoid implicit none @@ -179,4 +180,13 @@ module function self_attention(num_heads) result(res) allocate(res % p, source=self_attention_layer(num_heads)) end function self_attention + module function fc2d(hidden_size, output_size, activation) result(res) + integer, intent(in) :: hidden_size, output_size + class(activation_function), intent(in) :: activation + type(layer) :: res + + res % name = 'fc2d' + allocate(res % p, source=fc2d_layer(hidden_size, output_size, activation)) + end function fc2d + end submodule nf_layer_constructors_submodule diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index ecdeb41d..454cb530 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -12,6 +12,7 @@ use nf_reshape_layer, only: reshape3d_layer use nf_linear2d_layer, only: linear2d_layer use nf_self_attention_layer, only: self_attention_layer + use nf_fc2d_layer, only: fc2d_layer use nf_optimizers, only: optimizer_base_type contains @@ -60,6 +61,8 @@ pure module subroutine backward_1d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) type is(self_attention_layer) call this_layer % backward(prev_layer % output, gradient) + type is(fc2d_layer) + call this_layer % backward(prev_layer % output, gradient) end select end select @@ -84,6 +87,8 @@ pure module subroutine backward_2d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) type is(self_attention_layer) call this_layer % backward(prev_layer % output, gradient) + type is(fc2d_layer) + call this_layer % backward(prev_layer % output, gradient) end select type is(self_attention_layer) @@ -95,6 +100,21 @@ pure module subroutine backward_2d(self, previous, gradient) call this_layer % backward(prev_layer % output, gradient) type is(self_attention_layer) call this_layer % backward(prev_layer % output, gradient) + type is(fc2d_layer) + call this_layer % backward(prev_layer % output, gradient) + end select + + type is(fc2d_layer) + + select type(prev_layer => previous % p) + type is(input2d_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(linear2d_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(self_attention_layer) + call this_layer % backward(prev_layer % output, gradient) + type is(fc2d_layer) + call this_layer % backward(prev_layer % output, gradient) end select end select @@ -234,6 +254,8 @@ module subroutine forward(self, input) call this_layer % forward(prev_layer % output) type is(linear2d_layer) call this_layer % forward(prev_layer % output) + type is(fc2d_layer) + call this_layer % forward(prev_layer % output) end select type is(reshape3d_layer) @@ -258,6 +280,8 @@ module subroutine forward(self, input) call this_layer % forward(prev_layer % output) type is(self_attention_layer) call this_layer % forward(prev_layer % output) + type is(fc2d_layer) + call this_layer % forward(prev_layer % output) end select type is(self_attention_layer) @@ -270,8 +294,23 @@ module subroutine forward(self, input) call this_layer % forward(prev_layer % output) type is(self_attention_layer) call this_layer % forward(prev_layer % output) + type is(fc2d_layer) + call this_layer % forward(prev_layer % output) end select + type is(fc2d_layer) + + ! Upstream layers permitted: input2d, linear2d + select type(prev_layer => input % p) + type is(input2d_layer) + call this_layer % forward(prev_layer % output) + type is(linear2d_layer) + call this_layer % forward(prev_layer % output) + type is(self_attention_layer) + call this_layer % forward(prev_layer % output) + type is(fc2d_layer) + call this_layer % forward(prev_layer % output) + end select end select end subroutine forward @@ -311,6 +350,8 @@ pure module subroutine get_output_2d(self, output) allocate(output, source=this_layer % output) type is(self_attention_layer) allocate(output, source=this_layer % output) + type is(fc2d_layer) + allocate(output, source=this_layer % output) class default error stop '2-d output can only be read from an input2d or linear2d layer.' @@ -367,6 +408,8 @@ impure elemental module subroutine init(self, input) self % layer_shape = shape(this_layer % output) type is(self_attention_layer) self % layer_shape = shape(this_layer % output) + type is(fc2d_layer) + self % layer_shape = shape(this_layer % output) type is(maxpool2d_layer) self % layer_shape = shape(this_layer % output) end select @@ -425,6 +468,8 @@ elemental module function get_num_params(self) result(num_params) num_params = this_layer % get_num_params() type is (self_attention_layer) num_params = this_layer % get_num_params() + type is (fc2d_layer) + num_params = this_layer % get_num_params() class default error stop 'Unknown layer type.' end select @@ -458,6 +503,8 @@ module function get_params(self) result(params) params = this_layer % get_params() type is (self_attention_layer) params = this_layer % get_params() + type is (fc2d_layer) + params = this_layer % get_params() class default error stop 'Unknown layer type.' end select @@ -491,6 +538,8 @@ module function get_gradients(self) result(gradients) gradients = this_layer % get_gradients() type is (self_attention_layer) gradients = this_layer % get_gradients() + type is (fc2d_layer) + gradients = this_layer % get_gradients() class default error stop 'Unknown layer type.' end select @@ -549,6 +598,9 @@ module subroutine set_params(self, params) type is (self_attention_layer) call this_layer % set_params(params) + type is (fc2d_layer) + call this_layer % set_params(params) + type is (maxpool2d_layer) ! No parameters to set. write(stderr, '(a)') 'Warning: calling set_params() ' & diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index f344c5c5..beb38722 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -11,6 +11,7 @@ use nf_reshape_layer, only: reshape3d_layer use nf_linear2d_layer, only: linear2d_layer use nf_self_attention_layer, only: self_attention_layer + use nf_fc2d_layer, only: fc2d_layer use nf_layer, only: layer use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape use nf_loss, only: quadratic @@ -163,6 +164,8 @@ module subroutine backward(self, output, loss) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) type is(self_attention_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) + type is(fc2d_layer) + call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) end select end if From 6753ef43c99f335dafb626cba8a3777e87e7f622 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Mon, 24 Feb 2025 23:46:08 +0400 Subject: [PATCH 7/8] fc2d_layer: add example for 2d mlp --- example/simple_2d_mlp.f90 | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 example/simple_2d_mlp.f90 diff --git a/example/simple_2d_mlp.f90 b/example/simple_2d_mlp.f90 new file mode 100644 index 00000000..6e20e0cc --- /dev/null +++ b/example/simple_2d_mlp.f90 @@ -0,0 +1,35 @@ +program simple + use nf, only: dense, fc2d, flatten, linear2d, input, network, sgd, relu, tanhf + implicit none + type(network) :: net + real, allocatable :: x(:, :), y(:) + integer, parameter :: num_iterations = 25 + integer :: n + + print '("Simple")' + print '(60("="))' + + net = network([ & + input(4, 5), & + fc2d(3, 2, activation=relu()), & + flatten(), & + dense(4, activation=tanhf()) & + ]) + + call net % print_info() + + allocate(x(4, 5)) + call random_number(x) + y = [0.123456, 0.246802, 0.9, 0.001] + + do n = 0, num_iterations + + call net % forward(x) + call net % backward(y) + call net % update(optimizer=sgd(learning_rate=0.05)) + + if (mod(n, 5) == 0) print *, n, net % predict(x) + + end do + +end program simple From e7251f58442383ab1b64f2a80925fd0cd6a2dc78 Mon Sep 17 00:00:00 2001 From: Mikhail Voronov Date: Tue, 25 Feb 2025 01:09:51 +0400 Subject: [PATCH 8/8] fc2d_layer: update cmake --- CMakeLists.txt | 2 ++ example/CMakeLists.txt | 1 + test/CMakeLists.txt | 1 + 3 files changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c1bf2231..a28b102e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,8 @@ add_library(neural-fortran src/nf/nf_datasets_mnist_submodule.f90 src/nf/nf_dense_layer.f90 src/nf/nf_dense_layer_submodule.f90 + src/nf/nf_fc2d_layer.f90 + src/nf/nf_fc2d_layer_submodule.f90 src/nf/nf_flatten_layer.f90 src/nf/nf_flatten_layer_submodule.f90 src/nf/nf_input1d_layer.f90 diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index f4b706b8..fc5dc595 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -7,6 +7,7 @@ foreach(execid sine quadratic mha_simple + simple_2d_mlp ) add_executable(${execid} ${execid}.f90) target_link_libraries(${execid} PRIVATE diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 741e9930..9f1a9142 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -12,6 +12,7 @@ foreach(execid insert_flatten reshape_layer multihead_attention_layer + fc2d_layer dense_network get_set_network_params conv2d_network