From fcb4d8600a27b7558782e56c383b48e35646a35e Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Sun, 23 Feb 2025 20:48:33 +0400
Subject: [PATCH 1/8] fc2d_layer: initial forward implementation

---
 src/nf/nf_fc2d_layer.f90 | 95 ++++++++++++++++++++++++++++++++++++++++
 test/test_fc2d_layer.f90 | 67 ++++++++++++++++++++++++++++
 2 files changed, 162 insertions(+)
 create mode 100644 src/nf/nf_fc2d_layer.f90
 create mode 100644 test/test_fc2d_layer.f90

diff --git a/src/nf/nf_fc2d_layer.f90 b/src/nf/nf_fc2d_layer.f90
new file mode 100644
index 00000000..ab906988
--- /dev/null
+++ b/src/nf/nf_fc2d_layer.f90
@@ -0,0 +1,95 @@
+module nf_fc2d_layer
+  use iso_fortran_env, only: stderr => error_unit
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+  use nf_linear2d_layer, only: linear2d_layer
+
+  implicit none
+
+  private
+  public :: fc2d_layer
+
+  type, extends(base_layer) :: fc2d_layer
+    integer :: sequence_length, hidden_size, model_dimension
+
+    type(linear2d_layer) :: in_proj
+    type(linear2d_layer) :: out_proj
+
+    class(activation_function), allocatable :: activation
+
+    real, allocatable :: gradient(:, :)
+    real, allocatable :: in_proj_input(:, :)
+    real, allocatable :: out_proj_input(:, :)
+
+    real, allocatable :: output(:, :)
+
+  contains
+!    procedure :: backward
+    procedure :: forward
+!    procedure :: get_num_params
+!    procedure :: get_params
+!    procedure :: get_gradients
+!    procedure :: set_params
+    procedure :: init
+  end type fc2d_layer
+
+  interface fc2d_layer
+    module function fc2d_layer_cons(hidden_size, activation) result(res)
+      !! This function returns the `fc2d_layer` instance.
+      integer, intent(in) :: hidden_size
+      class(activation_function), intent(in) :: activation
+      type(fc2d_layer) :: res
+    end function fc2d_layer_cons
+  end interface fc2d_layer
+
+contains
+  module function fc2d_layer_cons(hidden_size, activation) result(res)
+    !! This function returns the `fc2d_layer` instance.
+    integer, intent(in) :: hidden_size
+    class(activation_function), intent(in) :: activation
+    type(fc2d_layer) :: res
+
+    res % hidden_size = hidden_size
+    res % activation_name = activation % get_name()
+    allocate(res % activation, source = activation)
+  end function fc2d_layer_cons
+
+  module subroutine init(self, input_shape)
+    class(fc2d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    if (size(input_shape) /= 2) then
+      error stop "fc2d_layer accepts 2D input"
+    end if
+
+    self % sequence_length = input_shape(1)
+    self % model_dimension = input_shape(2)
+
+    self % in_proj = linear2d_layer(self % hidden_size)
+    call self % in_proj % init([self % sequence_length, self % model_dimension])
+
+    self % out_proj = linear2d_layer(self % model_dimension)
+    call self % out_proj % init([self % sequence_length, self % hidden_size])
+
+    allocate(self % in_proj_input(self % sequence_length, self % model_dimension))
+    allocate(self % out_proj_input(self % sequence_length, self % hidden_size))
+
+    allocate(self % output(self % sequence_length, self % model_dimension))
+  end subroutine init
+
+  pure module subroutine forward(self, input)
+    class(fc2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :)
+    integer :: i
+
+    self % in_proj_input = input
+    call self % in_proj % forward(input)
+
+    do concurrent(i = 1: self % sequence_length)
+      self % out_proj_input(i, :) = self % activation % eval_1d(self % in_proj % output(i, :))
+    end do
+
+    call self % out_proj % forward(self % out_proj_input)
+    self % output = self % out_proj % output
+  end subroutine forward
+end module nf_fc2d_layer
diff --git a/test/test_fc2d_layer.f90 b/test/test_fc2d_layer.f90
new file mode 100644
index 00000000..9a93eaad
--- /dev/null
+++ b/test/test_fc2d_layer.f90
@@ -0,0 +1,67 @@
+program test_fc2d_layer
+  use iso_fortran_env, only: stderr => error_unit
+  use nf_fc2d_layer, only: fc2d_layer
+  use nf, only: relu
+  implicit none
+
+  logical :: ok = .true.
+  real :: sample_input(3, 4) = reshape(&
+      [0.0, -10.1, 0.2, 10.3, 0.4, 10.5, -0.6, 10.7, 10.8, 0.9, 0.11, 0.12],&
+      [3, 4])
+  real :: sample_gradient(3, 4) = reshape([0.1, 3., 2., 0.1, 3., 3., 0.1, 2., 0.1, 3., 0.1, 3.], [3, 4])
+  type(fc2d_layer) :: fc
+
+  fc = fc2d_layer(hidden_size=5, activation=relu())
+  call fc % init([3, 4])
+  fc % in_proj % weights = 0.1
+  fc % in_proj % biases = 0.11
+  fc % out_proj % weights = 0.1
+  fc % out_proj % biases = 0.11
+
+  call test_fc2d_layer_forward(fc, ok, sample_input)
+
+  if (ok) then
+    print '(a)', 'test_fc2d_layer: All tests passed.'
+  else
+    write(stderr, '(a)') 'test_fc2d_layer: One or more tests failed.'
+    stop 1
+  end if
+
+contains
+  function allclose(x, y) result(res)
+    real, intent(in) :: x(:)
+    real, intent(in) :: y(:)
+    logical :: res
+
+    res = all(abs(x - y) <= (1e-06 + 1e-05 * abs(y)))
+  end function allclose
+
+  subroutine test_fc2d_layer_forward(fc, ok, input)
+    type(fc2d_layer), intent(in out) :: fc
+    logical, intent(in out) :: ok
+    real, intent(in) :: input(3, 4)
+    real :: output_shape(2)
+    real :: output_flat(12)
+    real :: expected_shape(2) = [3, 4]
+    real :: expected_output_flat(12) = [&
+        0.695, 0.2205, 1.246,&
+        0.695, 0.2205, 1.246,&
+        0.695, 0.2205, 1.246,&
+        0.695, 0.2205, 1.246&
+    ]
+
+    call fc % forward(input)
+
+    output_shape = shape(fc % output)
+    if (.not. all(output_shape.eq.expected_shape)) then
+      ok = .false.
+      write(stderr, '(a)') 'forward returned incorrect shape.. failed'
+    end if
+    output_flat = reshape(fc % output, shape(output_flat))
+    if (.not. allclose(output_flat, expected_output_flat)) then
+      ok = .false.
+      write(stderr, '(a)') 'forward returned incorrect values.. failed'
+    end if
+  end subroutine test_fc2d_layer_forward
+
+end program test_fc2d_layer
\ No newline at end of file

From 3ff960889ddb823cfcfd9bc317a2cd1efec1cf0d Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 24 Feb 2025 20:23:40 +0400
Subject: [PATCH 2/8] fc2d_layer: backward pass implementation

---
 src/nf/nf_fc2d_layer.f90 |  25 ++++++++-
 test/test_fc2d_layer.f90 | 111 +++++++++++++++++++++++++++++++++------
 2 files changed, 119 insertions(+), 17 deletions(-)

diff --git a/src/nf/nf_fc2d_layer.f90 b/src/nf/nf_fc2d_layer.f90
index ab906988..77221ff3 100644
--- a/src/nf/nf_fc2d_layer.f90
+++ b/src/nf/nf_fc2d_layer.f90
@@ -24,7 +24,7 @@ module nf_fc2d_layer
     real, allocatable :: output(:, :)
 
   contains
-!    procedure :: backward
+    procedure :: backward
     procedure :: forward
 !    procedure :: get_num_params
 !    procedure :: get_params
@@ -51,6 +51,11 @@ module function fc2d_layer_cons(hidden_size, activation) result(res)
 
     res % hidden_size = hidden_size
     res % activation_name = activation % get_name()
+    ! FIXME: implement correct derivative for `softmax`
+    if (res % activation_name == 'softmax') then
+      write(stderr, '(a)') '`softmax` activation is temporarily unavailable'
+      error stop 1
+    end if
     allocate(res % activation, source = activation)
   end function fc2d_layer_cons
 
@@ -75,6 +80,8 @@ module subroutine init(self, input_shape)
     allocate(self % out_proj_input(self % sequence_length, self % hidden_size))
 
     allocate(self % output(self % sequence_length, self % model_dimension))
+
+    allocate(self % gradient, mold=self % in_proj % gradient)
   end subroutine init
 
   pure module subroutine forward(self, input)
@@ -92,4 +99,20 @@ pure module subroutine forward(self, input)
     call self % out_proj % forward(self % out_proj_input)
     self % output = self % out_proj % output
   end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    class(fc2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :)
+    real, intent(in) :: gradient(:, :)
+    integer :: i
+
+    call self % out_proj % backward(self % out_proj_input, gradient)
+    do concurrent(i = 1: self % sequence_length)
+      self % out_proj % gradient(i, :) = self % out_proj % gradient(i, :) &
+      * (self % activation % eval_1d_prime(self % in_proj % output(i, :)))
+    end do
+    call self % in_proj % backward(self % in_proj_input, self % out_proj % gradient)
+
+    self % gradient = self % in_proj % gradient
+  end subroutine backward
 end module nf_fc2d_layer
diff --git a/test/test_fc2d_layer.f90 b/test/test_fc2d_layer.f90
index 9a93eaad..37713dc1 100644
--- a/test/test_fc2d_layer.f90
+++ b/test/test_fc2d_layer.f90
@@ -1,7 +1,7 @@
 program test_fc2d_layer
   use iso_fortran_env, only: stderr => error_unit
   use nf_fc2d_layer, only: fc2d_layer
-  use nf, only: relu
+  use nf, only: activation_function, relu, tanhf, sigmoid, softplus
   implicit none
 
   logical :: ok = .true.
@@ -11,20 +11,53 @@ program test_fc2d_layer
   real :: sample_gradient(3, 4) = reshape([0.1, 3., 2., 0.1, 3., 3., 0.1, 2., 0.1, 3., 0.1, 3.], [3, 4])
   type(fc2d_layer) :: fc
 
-  fc = fc2d_layer(hidden_size=5, activation=relu())
-  call fc % init([3, 4])
-  fc % in_proj % weights = 0.1
-  fc % in_proj % biases = 0.11
-  fc % out_proj % weights = 0.1
-  fc % out_proj % biases = 0.11
-
-  call test_fc2d_layer_forward(fc, ok, sample_input)
+  call test_fc2d_layer_forward(ok, sample_input)
+  call test_fc2d_layer_backward(&
+    ok, sample_input, sample_gradient,&
+    activation=relu(),&
+    expected_gradient_flat=[&
+      0.198, 0.486, 0.486,&
+      0.396, 0.972, 0.972,&
+      0.594, 1.458, 1.458,&
+      0.792, 1.944, 1.944&
+    ]&
+  )
+  call test_fc2d_layer_backward(&
+    ok, sample_input, sample_gradient,&
+    activation=sigmoid(),&
+    expected_gradient_flat=[&
+      0.01068044, 0.02734236, 0.00086295,&
+      0.02136087, 0.05140798, 0.00172666,&
+      0.03357822, 0.07555774, 0.00266102,&
+      0.04567052, 0.10338347, 0.0038053&
+    ]&
+  )
+  call test_fc2d_layer_backward(&
+    ok, sample_input, sample_gradient,&
+    activation=tanhf(),&
+    expected_gradient_flat=[&
+      3.7096841e-03, 9.3461145e-03, 1.1113838e-05,&
+      7.4193683e-03, 1.6985621e-02, 2.2227676e-05,&
+      1.2096796e-02, 2.4647098e-02, 3.3862932e-05,&
+      1.6650427e-02, 3.4423053e-02, 5.0007438e-05&
+    ]&
+  )
+  call test_fc2d_layer_backward(&
+    ok, sample_input, sample_gradient,&
+    activation=softplus(),&
+    expected_gradient_flat=[&
+      0.18651924, 0.45662752, 0.48513436,&
+      0.37303847, 0.9168981, 0.9702679,&
+      0.5578177, 1.3770795, 1.4553307,&
+      0.7427467, 1.8331366, 1.9401824&
+    ]&
+  )
 
   if (ok) then
     print '(a)', 'test_fc2d_layer: All tests passed.'
   else
     write(stderr, '(a)') 'test_fc2d_layer: One or more tests failed.'
-    stop 1
+    error stop 1
   end if
 
 contains
@@ -36,20 +69,35 @@ function allclose(x, y) result(res)
     res = all(abs(x - y) <= (1e-06 + 1e-05 * abs(y)))
   end function allclose
 
-  subroutine test_fc2d_layer_forward(fc, ok, input)
-    type(fc2d_layer), intent(in out) :: fc
+  subroutine init_weigths(fc)
+    type(fc2d_layer) :: fc
+    fc % in_proj % weights = reshape(&
+        [0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.5, 0.1, 0.2, 0.4, 0.5, 0.1, 0.3, 0.4, 0.5, 0.2, 0.3, 0.4, 0.5],&
+        [4, 5]&
+    )
+    fc % in_proj % biases = 0.11
+    fc % out_proj % weights = 0.1
+    fc % out_proj % biases = 0.11
+  end subroutine init_weigths
+
+  subroutine test_fc2d_layer_forward(ok, input)
     logical, intent(in out) :: ok
     real, intent(in) :: input(3, 4)
+    type(fc2d_layer) :: fc
     real :: output_shape(2)
     real :: output_flat(12)
     real :: expected_shape(2) = [3, 4]
     real :: expected_output_flat(12) = [&
-        0.695, 0.2205, 1.246,&
-        0.695, 0.2205, 1.246,&
-        0.695, 0.2205, 1.246,&
-        0.695, 0.2205, 1.246&
+        1.509, 1.5594, 3.4098,&
+        1.509, 1.5594, 3.4098,&
+        1.509, 1.5594, 3.4098,&
+        1.509, 1.5594, 3.4098&
     ]
 
+    fc = fc2d_layer(hidden_size=5, activation=relu())
+    call fc % init([3, 4])
+    call init_weigths(fc)
+
     call fc % forward(input)
 
     output_shape = shape(fc % output)
@@ -64,4 +112,35 @@ subroutine test_fc2d_layer_forward(fc, ok, input)
     end if
   end subroutine test_fc2d_layer_forward
 
+  subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gradient_flat)
+    logical, intent(in out) :: ok
+    real, intent(in) :: input(3, 4)
+    real, intent(in) :: gradient(3, 4)
+    class(activation_function), intent(in) :: activation
+    real, intent(in) :: expected_gradient_flat(12)
+
+    type(fc2d_layer) :: fc
+
+    integer :: gradient_shape(2)
+    integer :: expected_gradient_shape(2) = [3, 4]
+    real :: gradient_flat(12)
+
+    fc = fc2d_layer(hidden_size=5, activation=activation)
+    call fc % init([3, 4])
+    call init_weigths(fc)
+
+    call fc % forward(input)
+    call fc % backward(input, gradient)
+
+    gradient_shape = shape(fc % gradient)
+    if (.not. all(gradient_shape.eq.expected_gradient_shape)) then
+      ok = .false.
+      write(stderr, '(a) (a)') 'backward returned incorrect gradient shape.. failed', fc % activation % get_name()
+    end if
+    gradient_flat = reshape(fc % gradient, shape(gradient_flat))
+    if (.not. allclose(gradient_flat, expected_gradient_flat)) then
+      ok = .false.
+      write(stderr, '(aa)') 'backward returned incorrect gradient values.. failed for ', fc % activation % get_name()
+    end if
+  end subroutine test_fc2d_layer_backward
 end program test_fc2d_layer
\ No newline at end of file

From 07fb4d7bd58e5405043d9b12dc57af6aab68eecf Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 24 Feb 2025 22:03:20 +0400
Subject: [PATCH 3/8] fc2d_layer: parameters

---
 src/nf/nf_fc2d_layer.f90 | 64 +++++++++++++++++++++++++++++++++++++---
 test/test_fc2d_layer.f90 | 50 +++++++++++++++++++++++++++++--
 2 files changed, 107 insertions(+), 7 deletions(-)

diff --git a/src/nf/nf_fc2d_layer.f90 b/src/nf/nf_fc2d_layer.f90
index 77221ff3..fc7e9e34 100644
--- a/src/nf/nf_fc2d_layer.f90
+++ b/src/nf/nf_fc2d_layer.f90
@@ -26,10 +26,10 @@ module nf_fc2d_layer
   contains
     procedure :: backward
     procedure :: forward
-!    procedure :: get_num_params
-!    procedure :: get_params
-!    procedure :: get_gradients
-!    procedure :: set_params
+    procedure :: get_num_params
+    procedure :: get_params
+    procedure :: get_gradients
+    procedure :: set_params
     procedure :: init
   end type fc2d_layer
 
@@ -115,4 +115,60 @@ pure module subroutine backward(self, input, gradient)
 
     self % gradient = self % in_proj % gradient
   end subroutine backward
+
+  elemental module function get_num_params(self) result(num_params)
+    class(fc2d_layer), intent(in) :: self
+    integer :: num_params
+
+    num_params = self % in_proj % get_num_params() + self % out_proj % get_num_params()
+  end function get_num_params
+
+  module function get_params(self) result(params)
+    class(fc2d_layer), intent(in) :: self
+    real, allocatable :: params(:)
+
+    params = [&
+        self % in_proj % weights,&
+        self % out_proj % weights,&
+        self % in_proj % biases,&
+        self % out_proj % biases&
+    ]
+  end function get_params
+
+  module function get_gradients(self) result(gradients)
+    class(fc2d_layer), intent(in), target :: self
+    real, allocatable :: gradients(:)
+
+    gradients = [ &
+        self % in_proj % dw,&
+        self % out_proj % dw,&
+        self % in_proj % db,&
+        self % out_proj % db&
+    ]
+  end function get_gradients
+
+  module subroutine set_params(self, params)
+    class(fc2d_layer), intent(in out) :: self
+    real, intent(in) :: params(:)
+    integer :: i, j, window
+
+    ! check if the number of parameters is correct
+    if (size(params) /= self % get_num_params()) then
+      error stop 'Error: number of parameters does not match'
+    end if
+
+    ! FIXME: looks clumsy, better ideas?
+    associate (transformation => self % model_dimension * self % hidden_size)
+      self % in_proj % weights = reshape(params(1: transformation), shape(self % in_proj % weights))
+      self % out_proj % weights = reshape(&
+          params(transformation + 1: 2 * transformation),&
+          shape(self % out_proj % weights)&
+      )
+      self % in_proj % biases = params(2 * transformation + 1: 2 * transformation + self % hidden_size)
+      self % out_proj % biases = params(&
+          2 * transformation + self % hidden_size + 1: &
+          2 * transformation + self % hidden_size + self % model_dimension&
+      )
+    end associate
+  end subroutine set_params
 end module nf_fc2d_layer
diff --git a/test/test_fc2d_layer.f90 b/test/test_fc2d_layer.f90
index 37713dc1..62e709cc 100644
--- a/test/test_fc2d_layer.f90
+++ b/test/test_fc2d_layer.f90
@@ -1,7 +1,7 @@
 program test_fc2d_layer
   use iso_fortran_env, only: stderr => error_unit
   use nf_fc2d_layer, only: fc2d_layer
-  use nf, only: activation_function, relu, tanhf, sigmoid, softplus
+  use nf, only: activation_function, relu, tanhf, sigmoid, softplus, sgd
   implicit none
 
   logical :: ok = .true.
@@ -52,6 +52,7 @@ program test_fc2d_layer
       0.7427467, 1.8331366, 1.9401824&
     ]&
   )
+  call test_fc2d_layer_update_gradients(ok, sample_input, sample_gradient)
 
   if (ok) then
     print '(a)', 'test_fc2d_layer: All tests passed.'
@@ -135,7 +136,7 @@ subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gr
     gradient_shape = shape(fc % gradient)
     if (.not. all(gradient_shape.eq.expected_gradient_shape)) then
       ok = .false.
-      write(stderr, '(a) (a)') 'backward returned incorrect gradient shape.. failed', fc % activation % get_name()
+      write(stderr, '(aa)') 'backward returned incorrect gradient shape.. failed', fc % activation % get_name()
     end if
     gradient_flat = reshape(fc % gradient, shape(gradient_flat))
     if (.not. allclose(gradient_flat, expected_gradient_flat)) then
@@ -143,4 +144,47 @@ subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gr
       write(stderr, '(aa)') 'backward returned incorrect gradient values.. failed for ', fc % activation % get_name()
     end if
   end subroutine test_fc2d_layer_backward
-end program test_fc2d_layer
\ No newline at end of file
+
+  subroutine test_fc2d_layer_update_gradients(ok, input, gradient)
+    logical, intent(in out) :: ok
+    real, intent(in) :: input(3, 4)
+    real, intent(in) :: gradient(3, 4)
+
+    type(fc2d_layer) :: fc
+    type(sgd) :: optim
+
+    real :: parameters(49)
+    real :: updated_output(12)
+    real :: expected_updated_output(12) = [&
+        -1.1192487, -0.51458186, -2.2737966,&
+        -1.7527609, -0.8190526, -3.5071785,&
+        0.36815026, 0.2097921, 0.6197472,&
+        -1.7491575, -0.79099315, -3.4819508&
+    ]
+
+    fc = fc2d_layer(hidden_size=5, activation=softplus())
+    call fc % init([3, 4])
+    call init_weigths(fc)
+
+    call fc % forward(input)
+    call fc % backward(input, gradient)
+
+    if (fc % get_num_params() /= 49) then
+      ok = .false.
+      write(stderr, '(a)') 'incorrect number of parameters.. failed'
+    end if
+
+    optim = SGD(learning_rate=0.01)
+    parameters = fc % get_params()
+    call optim % minimize(parameters, fc % get_gradients())
+    call fc % set_params(parameters)
+
+    call fc % forward(input)
+
+    updated_output = reshape(fc % output, [12])
+    if (.not. allclose(updated_output, expected_updated_output)) then
+      ok = .false.
+      write(stderr, '(a)') 'incorrect output after parameters update.. failed'
+    end if
+  end subroutine test_fc2d_layer_update_gradients
+end program test_fc2d_layer

From 7e7454bfe4036f5ac4e6ab0328a1b6594d82451b Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 24 Feb 2025 22:17:06 +0400
Subject: [PATCH 4/8] fc2d_layer: submodule, comments, style fixes

---
 src/nf/nf_fc2d_layer.f90           | 168 +++++++----------------------
 src/nf/nf_fc2d_layer_submodule.f90 | 138 ++++++++++++++++++++++++
 test/test_fc2d_layer.f90           |   9 +-
 3 files changed, 184 insertions(+), 131 deletions(-)
 create mode 100644 src/nf/nf_fc2d_layer_submodule.f90

diff --git a/src/nf/nf_fc2d_layer.f90 b/src/nf/nf_fc2d_layer.f90
index fc7e9e34..78288457 100644
--- a/src/nf/nf_fc2d_layer.f90
+++ b/src/nf/nf_fc2d_layer.f90
@@ -10,6 +10,8 @@ module nf_fc2d_layer
   public :: fc2d_layer
 
   type, extends(base_layer) :: fc2d_layer
+    !! Fully Connected 2D Layer
+    !! Two Linear layers with an activation function in between
     integer :: sequence_length, hidden_size, model_dimension
 
     type(linear2d_layer) :: in_proj
@@ -42,133 +44,41 @@ module function fc2d_layer_cons(hidden_size, activation) result(res)
     end function fc2d_layer_cons
   end interface fc2d_layer
 
-contains
-  module function fc2d_layer_cons(hidden_size, activation) result(res)
-    !! This function returns the `fc2d_layer` instance.
-    integer, intent(in) :: hidden_size
-    class(activation_function), intent(in) :: activation
-    type(fc2d_layer) :: res
-
-    res % hidden_size = hidden_size
-    res % activation_name = activation % get_name()
-    ! FIXME: implement correct derivative for `softmax`
-    if (res % activation_name == 'softmax') then
-      write(stderr, '(a)') '`softmax` activation is temporarily unavailable'
-      error stop 1
-    end if
-    allocate(res % activation, source = activation)
-  end function fc2d_layer_cons
-
-  module subroutine init(self, input_shape)
-    class(fc2d_layer), intent(in out) :: self
-    integer, intent(in) :: input_shape(:)
-
-    if (size(input_shape) /= 2) then
-      error stop "fc2d_layer accepts 2D input"
-    end if
-
-    self % sequence_length = input_shape(1)
-    self % model_dimension = input_shape(2)
-
-    self % in_proj = linear2d_layer(self % hidden_size)
-    call self % in_proj % init([self % sequence_length, self % model_dimension])
-
-    self % out_proj = linear2d_layer(self % model_dimension)
-    call self % out_proj % init([self % sequence_length, self % hidden_size])
-
-    allocate(self % in_proj_input(self % sequence_length, self % model_dimension))
-    allocate(self % out_proj_input(self % sequence_length, self % hidden_size))
-
-    allocate(self % output(self % sequence_length, self % model_dimension))
-
-    allocate(self % gradient, mold=self % in_proj % gradient)
-  end subroutine init
-
-  pure module subroutine forward(self, input)
-    class(fc2d_layer), intent(in out) :: self
-    real, intent(in) :: input(:, :)
-    integer :: i
-
-    self % in_proj_input = input
-    call self % in_proj % forward(input)
-
-    do concurrent(i = 1: self % sequence_length)
-      self % out_proj_input(i, :) = self % activation % eval_1d(self % in_proj % output(i, :))
-    end do
-
-    call self % out_proj % forward(self % out_proj_input)
-    self % output = self % out_proj % output
-  end subroutine forward
-
-  pure module subroutine backward(self, input, gradient)
-    class(fc2d_layer), intent(in out) :: self
-    real, intent(in) :: input(:, :)
-    real, intent(in) :: gradient(:, :)
-    integer :: i
-
-    call self % out_proj % backward(self % out_proj_input, gradient)
-    do concurrent(i = 1: self % sequence_length)
-      self % out_proj % gradient(i, :) = self % out_proj % gradient(i, :) &
-      * (self % activation % eval_1d_prime(self % in_proj % output(i, :)))
-    end do
-    call self % in_proj % backward(self % in_proj_input, self % out_proj % gradient)
-
-    self % gradient = self % in_proj % gradient
-  end subroutine backward
-
-  elemental module function get_num_params(self) result(num_params)
-    class(fc2d_layer), intent(in) :: self
-    integer :: num_params
-
-    num_params = self % in_proj % get_num_params() + self % out_proj % get_num_params()
-  end function get_num_params
-
-  module function get_params(self) result(params)
-    class(fc2d_layer), intent(in) :: self
-    real, allocatable :: params(:)
-
-    params = [&
-        self % in_proj % weights,&
-        self % out_proj % weights,&
-        self % in_proj % biases,&
-        self % out_proj % biases&
-    ]
-  end function get_params
-
-  module function get_gradients(self) result(gradients)
-    class(fc2d_layer), intent(in), target :: self
-    real, allocatable :: gradients(:)
-
-    gradients = [ &
-        self % in_proj % dw,&
-        self % out_proj % dw,&
-        self % in_proj % db,&
-        self % out_proj % db&
-    ]
-  end function get_gradients
-
-  module subroutine set_params(self, params)
-    class(fc2d_layer), intent(in out) :: self
-    real, intent(in) :: params(:)
-    integer :: i, j, window
-
-    ! check if the number of parameters is correct
-    if (size(params) /= self % get_num_params()) then
-      error stop 'Error: number of parameters does not match'
-    end if
-
-    ! FIXME: looks clumsy, better ideas?
-    associate (transformation => self % model_dimension * self % hidden_size)
-      self % in_proj % weights = reshape(params(1: transformation), shape(self % in_proj % weights))
-      self % out_proj % weights = reshape(&
-          params(transformation + 1: 2 * transformation),&
-          shape(self % out_proj % weights)&
-      )
-      self % in_proj % biases = params(2 * transformation + 1: 2 * transformation + self % hidden_size)
-      self % out_proj % biases = params(&
-          2 * transformation + self % hidden_size + 1: &
-          2 * transformation + self % hidden_size + self % model_dimension&
-      )
-    end associate
-  end subroutine set_params
+  interface
+    module subroutine init(self, input_shape)
+      class(fc2d_layer), intent(in out) :: self
+      integer, intent(in) :: input_shape(:)
+    end subroutine init
+
+    pure module subroutine forward(self, input)
+      class(fc2d_layer), intent(in out) :: self
+      real, intent(in) :: input(:, :)
+    end subroutine forward
+
+    pure module subroutine backward(self, input, gradient)
+      class(fc2d_layer), intent(in out) :: self
+      real, intent(in) :: input(:, :)
+      real, intent(in) :: gradient(:, :)
+    end subroutine backward
+
+    elemental module function get_num_params(self) result(num_params)
+      class(fc2d_layer), intent(in) :: self
+      integer :: num_params
+    end function get_num_params
+
+    module function get_params(self) result(params)
+      class(fc2d_layer), intent(in) :: self
+      real, allocatable :: params(:)
+    end function get_params
+
+    module function get_gradients(self) result(gradients)
+      class(fc2d_layer), intent(in), target :: self
+      real, allocatable :: gradients(:)
+    end function get_gradients
+
+    module subroutine set_params(self, params)
+      class(fc2d_layer), intent(in out) :: self
+      real, intent(in) :: params(:)
+    end subroutine set_params
+  end interface
 end module nf_fc2d_layer
diff --git a/src/nf/nf_fc2d_layer_submodule.f90 b/src/nf/nf_fc2d_layer_submodule.f90
new file mode 100644
index 00000000..60aa9c7a
--- /dev/null
+++ b/src/nf/nf_fc2d_layer_submodule.f90
@@ -0,0 +1,138 @@
+submodule(nf_fc2d_layer) nf_fc2d_layer_submodule
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+  use nf_linear2d_layer, only: linear2d_layer
+
+  implicit none
+
+contains
+  module function fc2d_layer_cons(hidden_size, activation) result(res)
+    !! This function returns the `fc2d_layer` instance.
+    integer, intent(in) :: hidden_size
+    class(activation_function), intent(in) :: activation
+    type(fc2d_layer) :: res
+
+    res % hidden_size = hidden_size
+    res % activation_name = activation % get_name()
+    ! FIXME: implement correct derivative for `softmax`
+    if (res % activation_name == 'softmax') then
+      write(stderr, '(a)') '`softmax` activation is temporarily unavailable'
+      error stop 1
+    end if
+    allocate(res % activation, source = activation)
+  end function fc2d_layer_cons
+
+  module subroutine init(self, input_shape)
+    class(fc2d_layer), intent(in out) :: self
+    integer, intent(in) :: input_shape(:)
+
+    if (size(input_shape) /= 2) then
+      error stop "fc2d_layer accepts 2D input"
+    end if
+
+    self % sequence_length = input_shape(1)
+    self % model_dimension = input_shape(2)
+
+    self % in_proj = linear2d_layer(self % hidden_size)
+    call self % in_proj % init([self % sequence_length, self % model_dimension])
+
+    self % out_proj = linear2d_layer(self % model_dimension)
+    call self % out_proj % init([self % sequence_length, self % hidden_size])
+
+    allocate(self % in_proj_input(self % sequence_length, self % model_dimension))
+    allocate(self % out_proj_input(self % sequence_length, self % hidden_size))
+
+    allocate(self % output(self % sequence_length, self % model_dimension))
+
+    allocate(self % gradient, mold=self % in_proj % gradient)
+  end subroutine init
+
+  pure module subroutine forward(self, input)
+    class(fc2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :)
+    integer :: i
+
+    self % in_proj_input = input
+    call self % in_proj % forward(input)
+
+    do concurrent(i = 1: self % sequence_length)
+      self % out_proj_input(i, :) = self % activation % eval_1d(self % in_proj % output(i, :))
+    end do
+
+    call self % out_proj % forward(self % out_proj_input)
+    self % output = self % out_proj % output
+  end subroutine forward
+
+  pure module subroutine backward(self, input, gradient)
+    class(fc2d_layer), intent(in out) :: self
+    real, intent(in) :: input(:, :)
+    real, intent(in) :: gradient(:, :)
+    integer :: i
+
+    call self % out_proj % backward(self % out_proj_input, gradient)
+    ! d_output/d_activation = d_output/d_output_proj * d/d_activation
+    do concurrent(i = 1: self % sequence_length)
+      self % out_proj % gradient(i, :) = &
+          self % out_proj % gradient(i, :) &
+          * (self % activation % eval_1d_prime(self % in_proj % output(i, :)))
+    end do
+    call self % in_proj % backward(self % in_proj_input, self % out_proj % gradient)
+
+    self % gradient = self % in_proj % gradient
+  end subroutine backward
+
+  elemental module function get_num_params(self) result(num_params)
+    class(fc2d_layer), intent(in) :: self
+    integer :: num_params
+
+    num_params = self % in_proj % get_num_params() + self % out_proj % get_num_params()
+  end function get_num_params
+
+  module function get_params(self) result(params)
+    class(fc2d_layer), intent(in) :: self
+    real, allocatable :: params(:)
+
+    params = [&
+        self % in_proj % weights,&
+        self % out_proj % weights,&
+        self % in_proj % biases,&
+        self % out_proj % biases&
+    ]
+  end function get_params
+
+  module function get_gradients(self) result(gradients)
+    class(fc2d_layer), intent(in), target :: self
+    real, allocatable :: gradients(:)
+
+    gradients = [ &
+        self % in_proj % dw,&
+        self % out_proj % dw,&
+        self % in_proj % db,&
+        self % out_proj % db&
+    ]
+  end function get_gradients
+
+  module subroutine set_params(self, params)
+    class(fc2d_layer), intent(in out) :: self
+    real, intent(in) :: params(:)
+
+    ! check if the number of parameters is correct
+    if (size(params) /= self % get_num_params()) then
+      error stop 'Error: number of parameters does not match'
+    end if
+
+    ! FIXME: looks clumsy, better ideas?
+    associate (transformation => self % model_dimension * self % hidden_size)
+      self % in_proj % weights = reshape(params(1: transformation), shape(self % in_proj % weights))
+      self % out_proj % weights = reshape(&
+          params(transformation + 1: 2 * transformation),&
+          shape(self % out_proj % weights)&
+      )
+      self % in_proj % biases = params(2 * transformation + 1: 2 * transformation + self % hidden_size)
+      self % out_proj % biases = params(&
+          2 * transformation + self % hidden_size + 1: &
+          2 * transformation + self % hidden_size + self % model_dimension&
+      )
+    end associate
+  end subroutine set_params
+end submodule nf_fc2d_layer_submodule
diff --git a/test/test_fc2d_layer.f90 b/test/test_fc2d_layer.f90
index 62e709cc..64f6df18 100644
--- a/test/test_fc2d_layer.f90
+++ b/test/test_fc2d_layer.f90
@@ -73,7 +73,12 @@ end function allclose
   subroutine init_weigths(fc)
     type(fc2d_layer) :: fc
     fc % in_proj % weights = reshape(&
-        [0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.5, 0.1, 0.2, 0.4, 0.5, 0.1, 0.3, 0.4, 0.5, 0.2, 0.3, 0.4, 0.5],&
+        [&
+            0.1, 0.2, 0.3, 0.4, 0.1,&
+            0.2, 0.3, 0.5, 0.1, 0.2,&
+            0.4, 0.5, 0.1, 0.3, 0.4,&
+            0.5, 0.2, 0.3, 0.4, 0.5&
+        ],&
         [4, 5]&
     )
     fc % in_proj % biases = 0.11
@@ -136,7 +141,7 @@ subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gr
     gradient_shape = shape(fc % gradient)
     if (.not. all(gradient_shape.eq.expected_gradient_shape)) then
       ok = .false.
-      write(stderr, '(aa)') 'backward returned incorrect gradient shape.. failed', fc % activation % get_name()
+      write(stderr, '(aa)') 'backward returned incorrect gradient shape.. failed for', fc % activation % get_name()
     end if
     gradient_flat = reshape(fc % gradient, shape(gradient_flat))
     if (.not. allclose(gradient_flat, expected_gradient_flat)) then

From 1f57149d8c05da31e84527650a7abc83881a68ac Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 24 Feb 2025 23:22:21 +0400
Subject: [PATCH 5/8] fc2d_layer: make output size variable

---
 src/nf/nf_fc2d_layer.f90           |  6 ++---
 src/nf/nf_fc2d_layer_submodule.f90 | 34 +++++++++++++++-------------
 test/test_fc2d_layer.f90           | 36 ++++++++++++++++++++++++++----
 3 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/src/nf/nf_fc2d_layer.f90 b/src/nf/nf_fc2d_layer.f90
index 78288457..fc30bc37 100644
--- a/src/nf/nf_fc2d_layer.f90
+++ b/src/nf/nf_fc2d_layer.f90
@@ -12,7 +12,7 @@ module nf_fc2d_layer
   type, extends(base_layer) :: fc2d_layer
     !! Fully Connected 2D Layer
     !! Two Linear layers with an activation function in between
-    integer :: sequence_length, hidden_size, model_dimension
+    integer :: sequence_length, model_dimension, hidden_size, output_size
 
     type(linear2d_layer) :: in_proj
     type(linear2d_layer) :: out_proj
@@ -36,9 +36,9 @@ module nf_fc2d_layer
   end type fc2d_layer
 
   interface fc2d_layer
-    module function fc2d_layer_cons(hidden_size, activation) result(res)
+    module function fc2d_layer_cons(hidden_size, output_size, activation) result(res)
       !! This function returns the `fc2d_layer` instance.
-      integer, intent(in) :: hidden_size
+      integer, intent(in) :: hidden_size, output_size
       class(activation_function), intent(in) :: activation
       type(fc2d_layer) :: res
     end function fc2d_layer_cons
diff --git a/src/nf/nf_fc2d_layer_submodule.f90 b/src/nf/nf_fc2d_layer_submodule.f90
index 60aa9c7a..83bf5866 100644
--- a/src/nf/nf_fc2d_layer_submodule.f90
+++ b/src/nf/nf_fc2d_layer_submodule.f90
@@ -6,13 +6,14 @@
   implicit none
 
 contains
-  module function fc2d_layer_cons(hidden_size, activation) result(res)
+  module function fc2d_layer_cons(hidden_size, output_size, activation) result(res)
     !! This function returns the `fc2d_layer` instance.
-    integer, intent(in) :: hidden_size
+    integer, intent(in) :: hidden_size, output_size
     class(activation_function), intent(in) :: activation
     type(fc2d_layer) :: res
 
     res % hidden_size = hidden_size
+    res % output_size = output_size
     res % activation_name = activation % get_name()
     ! FIXME: implement correct derivative for `softmax`
     if (res % activation_name == 'softmax') then
@@ -36,13 +37,13 @@ module subroutine init(self, input_shape)
     self % in_proj = linear2d_layer(self % hidden_size)
     call self % in_proj % init([self % sequence_length, self % model_dimension])
 
-    self % out_proj = linear2d_layer(self % model_dimension)
+    self % out_proj = linear2d_layer(self % output_size)
     call self % out_proj % init([self % sequence_length, self % hidden_size])
 
     allocate(self % in_proj_input(self % sequence_length, self % model_dimension))
     allocate(self % out_proj_input(self % sequence_length, self % hidden_size))
 
-    allocate(self % output(self % sequence_length, self % model_dimension))
+    allocate(self % output(self % sequence_length, self % output_size))
 
     allocate(self % gradient, mold=self % in_proj % gradient)
   end subroutine init
@@ -115,6 +116,7 @@ end function get_gradients
   module subroutine set_params(self, params)
     class(fc2d_layer), intent(in out) :: self
     real, intent(in) :: params(:)
+    integer :: i, j, window
 
     ! check if the number of parameters is correct
     if (size(params) /= self % get_num_params()) then
@@ -122,17 +124,17 @@ module subroutine set_params(self, params)
     end if
 
     ! FIXME: looks clumsy, better ideas?
-    associate (transformation => self % model_dimension * self % hidden_size)
-      self % in_proj % weights = reshape(params(1: transformation), shape(self % in_proj % weights))
-      self % out_proj % weights = reshape(&
-          params(transformation + 1: 2 * transformation),&
-          shape(self % out_proj % weights)&
-      )
-      self % in_proj % biases = params(2 * transformation + 1: 2 * transformation + self % hidden_size)
-      self % out_proj % biases = params(&
-          2 * transformation + self % hidden_size + 1: &
-          2 * transformation + self % hidden_size + self % model_dimension&
-      )
-    end associate
+    i = 1
+    j = self % model_dimension * self % hidden_size
+    self % in_proj % weights = reshape(params(i: j), [self % model_dimension, self % hidden_size])
+    i = j + 1
+    j = i + self % hidden_size * self % output_size - 1
+    self % out_proj % weights = reshape(params(i: j), [self % hidden_size, self % output_size])
+    i = j + 1
+    j = i + self % hidden_size - 1
+    self % in_proj % biases = params(i: j)
+    i = j + 1
+    j = i + self % output_size - 1
+    self % out_proj % biases = params(i: j)
   end subroutine set_params
 end submodule nf_fc2d_layer_submodule
diff --git a/test/test_fc2d_layer.f90 b/test/test_fc2d_layer.f90
index 64f6df18..2b22ee70 100644
--- a/test/test_fc2d_layer.f90
+++ b/test/test_fc2d_layer.f90
@@ -1,7 +1,7 @@
 program test_fc2d_layer
   use iso_fortran_env, only: stderr => error_unit
   use nf_fc2d_layer, only: fc2d_layer
-  use nf, only: activation_function, relu, tanhf, sigmoid, softplus, sgd
+  use nf, only: activation_function, relu, celu, tanhf, sigmoid, softplus, sgd
   implicit none
 
   logical :: ok = .true.
@@ -12,6 +12,7 @@ program test_fc2d_layer
   type(fc2d_layer) :: fc
 
   call test_fc2d_layer_forward(ok, sample_input)
+  call test_fc2d_layer_forward_different_shape(ok, sample_input)
   call test_fc2d_layer_backward(&
     ok, sample_input, sample_gradient,&
     activation=relu(),&
@@ -100,7 +101,7 @@ subroutine test_fc2d_layer_forward(ok, input)
         1.509, 1.5594, 3.4098&
     ]
 
-    fc = fc2d_layer(hidden_size=5, activation=relu())
+    fc = fc2d_layer(hidden_size=5, output_size=4, activation=relu())
     call fc % init([3, 4])
     call init_weigths(fc)
 
@@ -118,6 +119,33 @@ subroutine test_fc2d_layer_forward(ok, input)
     end if
   end subroutine test_fc2d_layer_forward
 
+  subroutine test_fc2d_layer_forward_different_shape(ok, input)
+    logical, intent(in out) :: ok
+    real, intent(in) :: input(3, 4)
+    type(fc2d_layer) :: fc
+    real :: output_shape(2)
+    real :: output_flat(3)
+    real :: expected_shape(2) = [3, 1]
+    real :: expected_output_flat(3) = [1.509, 1.5594, 3.4098]
+
+    fc = fc2d_layer(hidden_size=5, output_size=1, activation=celu())
+    call fc % init([3, 4])
+    call init_weigths(fc)
+
+    call fc % forward(input)
+
+    output_shape = shape(fc % output)
+    if (.not. all(output_shape.eq.expected_shape)) then
+      ok = .false.
+      write(stderr, '(a)') 'forward returned incorrect shape.. failed'
+    end if
+    output_flat = reshape(fc % output, shape(output_flat))
+    if (.not. allclose(output_flat, expected_output_flat)) then
+      ok = .false.
+      write(stderr, '(a)') 'forward returned incorrect values.. failed'
+    end if
+  end subroutine test_fc2d_layer_forward_different_shape
+
   subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gradient_flat)
     logical, intent(in out) :: ok
     real, intent(in) :: input(3, 4)
@@ -131,7 +159,7 @@ subroutine test_fc2d_layer_backward(ok, input, gradient, activation, expected_gr
     integer :: expected_gradient_shape(2) = [3, 4]
     real :: gradient_flat(12)
 
-    fc = fc2d_layer(hidden_size=5, activation=activation)
+    fc = fc2d_layer(hidden_size=5, output_size=4, activation=activation)
     call fc % init([3, 4])
     call init_weigths(fc)
 
@@ -167,7 +195,7 @@ subroutine test_fc2d_layer_update_gradients(ok, input, gradient)
         -1.7491575, -0.79099315, -3.4819508&
     ]
 
-    fc = fc2d_layer(hidden_size=5, activation=softplus())
+    fc = fc2d_layer(hidden_size=5, output_size=4, activation=softplus())
     call fc % init([3, 4])
     call init_weigths(fc)
 

From f8fb9773e2284e2c10d7b1d500abdf474e10486b Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 24 Feb 2025 23:44:31 +0400
Subject: [PATCH 6/8] fc2d_layer: plumbing

---
 src/nf.f90                                 |  3 +-
 src/nf/nf_layer_constructors.f90           | 26 ++++++-----
 src/nf/nf_layer_constructors_submodule.f90 | 10 +++++
 src/nf/nf_layer_submodule.f90              | 52 ++++++++++++++++++++++
 src/nf/nf_network_submodule.f90            |  3 ++
 5 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/src/nf.f90 b/src/nf.f90
index 39f67ea3..53a9d7ed 100644
--- a/src/nf.f90
+++ b/src/nf.f90
@@ -11,7 +11,8 @@ module nf
     linear2d, &
     maxpool2d, &
     reshape, &
-    self_attention
+    self_attention, &
+    fc2d
   use nf_loss, only: mse, quadratic
   use nf_metrics, only: corr, maxabs
   use nf_network, only: network
diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
index db60cf0f..56959a9d 100644
--- a/src/nf/nf_layer_constructors.f90
+++ b/src/nf/nf_layer_constructors.f90
@@ -17,7 +17,8 @@ module nf_layer_constructors
     linear2d, &
     maxpool2d, &
     reshape, &
-    self_attention
+    self_attention, &
+    fc2d
 
   interface input
 
@@ -222,16 +223,21 @@ module function linear2d(out_features) result(res)
         !! Resulting layer instance
     end function linear2d
 
-  module function self_attention(num_heads) result(res)
-    !! Rank-2 (sequence_length, out_features) self attention constructor.
-    !! sequence_length and model_dimension are determined at layer initialization, based on the
-    !! output shape of the previous layer.
-    integer, intent(in) :: num_heads
-      !! Number of attention heads
-    type(layer) :: res
-      !! Resulting layer instance
-  end function self_attention
+    module function self_attention(num_heads) result(res)
+      !! Rank-2 (sequence_length, out_features) self attention constructor.
+      !! sequence_length and model_dimension are determined at layer initialization, based on the
+      !! output shape of the previous layer.
+      integer, intent(in) :: num_heads
+        !! Number of attention heads
+      type(layer) :: res
+        !! Resulting layer instance
+    end function self_attention
 
+    module function fc2d(hidden_size, output_size, activation) result(res)
+      integer, intent(in) :: hidden_size, output_size
+      class(activation_function), intent(in) :: activation
+      type(layer) :: res
+    end function fc2d
   end interface
 
 end module nf_layer_constructors
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 9e5322c1..40802712 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -12,6 +12,7 @@
   use nf_reshape_layer, only: reshape3d_layer
   use nf_linear2d_layer, only: linear2d_layer
   use nf_self_attention_layer, only: self_attention_layer
+  use nf_fc2d_layer, only: fc2d_layer
   use nf_activation, only: activation_function, relu, sigmoid
 
   implicit none
@@ -179,4 +180,13 @@ module function self_attention(num_heads) result(res)
     allocate(res % p, source=self_attention_layer(num_heads))
   end function self_attention
 
+  module function fc2d(hidden_size, output_size, activation) result(res)
+    integer, intent(in) :: hidden_size, output_size
+    class(activation_function), intent(in) :: activation
+    type(layer) :: res
+
+    res % name = 'fc2d'
+    allocate(res % p, source=fc2d_layer(hidden_size, output_size, activation))
+  end function fc2d
+
 end submodule nf_layer_constructors_submodule
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
index ecdeb41d..454cb530 100644
--- a/src/nf/nf_layer_submodule.f90
+++ b/src/nf/nf_layer_submodule.f90
@@ -12,6 +12,7 @@
   use nf_reshape_layer, only: reshape3d_layer
   use nf_linear2d_layer, only: linear2d_layer
   use nf_self_attention_layer, only: self_attention_layer
+  use nf_fc2d_layer, only: fc2d_layer
   use nf_optimizers, only: optimizer_base_type
 
 contains
@@ -60,6 +61,8 @@ pure module subroutine backward_1d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
           type is(self_attention_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(fc2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
         end select
 
     end select
@@ -84,6 +87,8 @@ pure module subroutine backward_2d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
           type is(self_attention_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(fc2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
         end select
 
       type is(self_attention_layer)
@@ -95,6 +100,21 @@ pure module subroutine backward_2d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
           type is(self_attention_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(fc2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+        end select
+
+      type is(fc2d_layer)
+
+        select type(prev_layer => previous % p)
+          type is(input2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(linear2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(self_attention_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(fc2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
         end select
 
     end select
@@ -234,6 +254,8 @@ module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
           type is(linear2d_layer)
             call this_layer % forward(prev_layer % output)
+          type is(fc2d_layer)
+            call this_layer % forward(prev_layer % output)
         end select
 
       type is(reshape3d_layer)
@@ -258,6 +280,8 @@ module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
           type is(self_attention_layer)
             call this_layer % forward(prev_layer % output)
+          type is(fc2d_layer)
+            call this_layer % forward(prev_layer % output)
         end select
 
       type is(self_attention_layer)
@@ -270,8 +294,23 @@ module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
           type is(self_attention_layer)
             call this_layer % forward(prev_layer % output)
+          type is(fc2d_layer)
+            call this_layer % forward(prev_layer % output)
         end select
 
+      type is(fc2d_layer)
+
+        ! Upstream layers permitted: input2d, linear2d
+        select type(prev_layer => input % p)
+          type is(input2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(linear2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(self_attention_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(fc2d_layer)
+            call this_layer % forward(prev_layer % output)
+        end select
     end select
 
   end subroutine forward
@@ -311,6 +350,8 @@ pure module subroutine get_output_2d(self, output)
         allocate(output, source=this_layer % output)
       type is(self_attention_layer)
         allocate(output, source=this_layer % output)
+      type is(fc2d_layer)
+        allocate(output, source=this_layer % output)
       class default
         error stop '2-d output can only be read from an input2d or linear2d layer.'
 
@@ -367,6 +408,8 @@ impure elemental module subroutine init(self, input)
         self % layer_shape = shape(this_layer % output)
       type is(self_attention_layer)
         self % layer_shape = shape(this_layer % output)
+      type is(fc2d_layer)
+        self % layer_shape = shape(this_layer % output)
       type is(maxpool2d_layer)
         self % layer_shape = shape(this_layer % output)
     end select
@@ -425,6 +468,8 @@ elemental module function get_num_params(self) result(num_params)
         num_params = this_layer % get_num_params()
       type is (self_attention_layer)
         num_params = this_layer % get_num_params()
+      type is (fc2d_layer)
+        num_params = this_layer % get_num_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -458,6 +503,8 @@ module function get_params(self) result(params)
         params = this_layer % get_params()
       type is (self_attention_layer)
         params = this_layer % get_params()
+      type is (fc2d_layer)
+        params = this_layer % get_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -491,6 +538,8 @@ module function get_gradients(self) result(gradients)
         gradients = this_layer % get_gradients()
       type is (self_attention_layer)
         gradients = this_layer % get_gradients()
+      type is (fc2d_layer)
+        gradients = this_layer % get_gradients()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -549,6 +598,9 @@ module subroutine set_params(self, params)
       type is (self_attention_layer)
         call this_layer % set_params(params)
 
+      type is (fc2d_layer)
+        call this_layer % set_params(params)
+
       type is (maxpool2d_layer)
         ! No parameters to set.
         write(stderr, '(a)') 'Warning: calling set_params() ' &
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index f344c5c5..beb38722 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -11,6 +11,7 @@
   use nf_reshape_layer, only: reshape3d_layer
   use nf_linear2d_layer, only: linear2d_layer
   use nf_self_attention_layer, only: self_attention_layer
+  use nf_fc2d_layer, only: fc2d_layer
   use nf_layer, only: layer
   use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape
   use nf_loss, only: quadratic
@@ -163,6 +164,8 @@ module subroutine backward(self, output, loss)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
           type is(self_attention_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+          type is(fc2d_layer)
+            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
         end select
       end if
 

From 6753ef43c99f335dafb626cba8a3777e87e7f622 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Mon, 24 Feb 2025 23:46:08 +0400
Subject: [PATCH 7/8] fc2d_layer: add example for 2d mlp

---
 example/simple_2d_mlp.f90 | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 example/simple_2d_mlp.f90

diff --git a/example/simple_2d_mlp.f90 b/example/simple_2d_mlp.f90
new file mode 100644
index 00000000..6e20e0cc
--- /dev/null
+++ b/example/simple_2d_mlp.f90
@@ -0,0 +1,35 @@
+program simple
+  use nf, only: dense, fc2d, flatten, linear2d, input, network, sgd, relu, tanhf
+  implicit none
+  type(network) :: net
+  real, allocatable :: x(:, :), y(:)
+  integer, parameter :: num_iterations = 25
+  integer :: n
+
+  print '("Simple")'
+  print '(60("="))'
+
+  net = network([ &
+    input(4, 5), &
+    fc2d(3, 2, activation=relu()), &
+    flatten(), &
+    dense(4, activation=tanhf()) &
+  ])
+
+  call net % print_info()
+
+  allocate(x(4, 5))
+  call random_number(x)
+  y = [0.123456, 0.246802, 0.9, 0.001]
+
+  do n = 0, num_iterations
+
+    call net % forward(x)
+    call net % backward(y)
+    call net % update(optimizer=sgd(learning_rate=0.05))
+
+    if (mod(n, 5) == 0) print *, n, net % predict(x)
+
+  end do
+
+end program simple

From e7251f58442383ab1b64f2a80925fd0cd6a2dc78 Mon Sep 17 00:00:00 2001
From: Mikhail Voronov <mikivo@list.ru>
Date: Tue, 25 Feb 2025 01:09:51 +0400
Subject: [PATCH 8/8] fc2d_layer: update cmake

---
 CMakeLists.txt         | 2 ++
 example/CMakeLists.txt | 1 +
 test/CMakeLists.txt    | 1 +
 3 files changed, 4 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c1bf2231..a28b102e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,6 +27,8 @@ add_library(neural-fortran
   src/nf/nf_datasets_mnist_submodule.f90
   src/nf/nf_dense_layer.f90
   src/nf/nf_dense_layer_submodule.f90
+  src/nf/nf_fc2d_layer.f90
+  src/nf/nf_fc2d_layer_submodule.f90
   src/nf/nf_flatten_layer.f90
   src/nf/nf_flatten_layer_submodule.f90
   src/nf/nf_input1d_layer.f90
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index f4b706b8..fc5dc595 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -7,6 +7,7 @@ foreach(execid
   sine
   quadratic
   mha_simple
+  simple_2d_mlp
 )
   add_executable(${execid} ${execid}.f90)
   target_link_libraries(${execid} PRIVATE
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 741e9930..9f1a9142 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -12,6 +12,7 @@ foreach(execid
   insert_flatten
   reshape_layer
   multihead_attention_layer
+  fc2d_layer
   dense_network
   get_set_network_params
   conv2d_network