Skip to content

Layernorm #203

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ add_library(neural-fortran
src/nf/nf_input3d_layer_submodule.f90
src/nf/nf_layer_constructors.f90
src/nf/nf_layer_constructors_submodule.f90
src/nf/nf_layernorm.f90
src/nf/nf_layernorm_submodule.f90
src/nf/nf_layer.f90
src/nf/nf_layer_submodule.f90
src/nf/nf_linear2d_layer.f90
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
| Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 | ✅ | ✅ |
| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) |
| Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ |
| Linear (2-d) | `linear2d` | `input2d`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
| Self-attention | `self_attention` | `input2d`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
| Linear (2-d) | `linear2d` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
| Self-attention | `self_attention` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 | ✅ | ✅ |
| Layer Normalization | `layernorm` | `linear2d`, `self_attention` | 2 | ✅ | ✅ |
Comment on lines +37 to +39
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@OneAdder can you please check that I did this correctly?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, looks good!

| Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ |
| Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 | ✅ | ✅ |

Expand Down
2 changes: 1 addition & 1 deletion fpm.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = "neural-fortran"
version = "0.19.0"
version = "0.20.0"
license = "MIT"
author = "Milan Curcic"
maintainer = "[email protected]"
Expand Down
3 changes: 2 additions & 1 deletion src/nf.f90
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ module nf
linear2d, &
maxpool2d, &
reshape, &
self_attention
self_attention, &
layernorm
use nf_loss, only: mse, quadratic
use nf_metrics, only: corr, maxabs
use nf_network, only: network
Expand Down
29 changes: 19 additions & 10 deletions src/nf/nf_layer_constructors.f90
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ module nf_layer_constructors
linear2d, &
maxpool2d, &
reshape, &
self_attention
self_attention, &
layernorm

interface input

Expand Down Expand Up @@ -222,15 +223,23 @@ module function linear2d(out_features) result(res)
!! Resulting layer instance
end function linear2d

module function self_attention(num_heads) result(res)
!! Rank-2 (sequence_length, out_features) self attention constructor.
!! sequence_length and model_dimension are determined at layer initialization, based on the
!! output shape of the previous layer.
integer, intent(in) :: num_heads
!! Number of attention heads
type(layer) :: res
!! Resulting layer instance
end function self_attention
module function self_attention(num_heads) result(res)
!! Rank-2 (sequence_length, out_features) self attention constructor.
!! sequence_length and model_dimension are determined at layer initialization, based on the
!! output shape of the previous layer.
integer, intent(in) :: num_heads
!! Number of attention heads
type(layer) :: res
!! Resulting layer instance
end function self_attention

module function layernorm() result(res)
!! Layer Normalization
!! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
!! Based upon `Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton(2016)`:
!! https://arxiv.org/abs/1607.06450v1
type(layer) :: res
end function layernorm

end interface

Expand Down
8 changes: 8 additions & 0 deletions src/nf/nf_layer_constructors_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use nf_reshape_layer, only: reshape3d_layer
use nf_linear2d_layer, only: linear2d_layer
use nf_self_attention_layer, only: self_attention_layer
use nf_layernorm_layer, only: layernorm_layer
use nf_activation, only: activation_function, relu, sigmoid

implicit none
Expand Down Expand Up @@ -179,4 +180,11 @@ module function self_attention(num_heads) result(res)
allocate(res % p, source=self_attention_layer(num_heads))
end function self_attention

module function layernorm() result(res)
type(layer) :: res

res % name = 'layernorm'
allocate(res % p, source=layernorm_layer())
end function layernorm

end submodule nf_layer_constructors_submodule
54 changes: 49 additions & 5 deletions src/nf/nf_layer_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use nf_reshape_layer, only: reshape3d_layer
use nf_linear2d_layer, only: linear2d_layer
use nf_self_attention_layer, only: self_attention_layer
use nf_layernorm_layer, only: layernorm_layer
use nf_optimizers, only: optimizer_base_type

contains
Expand Down Expand Up @@ -46,7 +47,7 @@ pure module subroutine backward_1d(self, previous, gradient)

type is(flatten_layer)

! Upstream layers permitted: input2d, input3d, conv2d, maxpool2d
! Upstream layers permitted: input2d, input3d, conv2d, layernorm, maxpool2d
select type(prev_layer => previous % p)
type is(input2d_layer)
call this_layer % backward(prev_layer % output, gradient)
Expand All @@ -60,6 +61,8 @@ pure module subroutine backward_1d(self, previous, gradient)
call this_layer % backward(prev_layer % output, gradient)
type is(self_attention_layer)
call this_layer % backward(prev_layer % output, gradient)
type is(layernorm_layer)
call this_layer % backward(prev_layer % output, gradient)
end select

end select
Expand All @@ -84,6 +87,8 @@ pure module subroutine backward_2d(self, previous, gradient)
call this_layer % backward(prev_layer % output, gradient)
type is(self_attention_layer)
call this_layer % backward(prev_layer % output, gradient)
type is(layernorm_layer)
call this_layer % backward(prev_layer % output, gradient)
end select

type is(self_attention_layer)
Expand All @@ -95,8 +100,18 @@ pure module subroutine backward_2d(self, previous, gradient)
call this_layer % backward(prev_layer % output, gradient)
type is(self_attention_layer)
call this_layer % backward(prev_layer % output, gradient)
type is(layernorm_layer)
call this_layer % backward(prev_layer % output, gradient)
end select

type is(layernorm_layer)

select type(prev_layer => previous % p)
type is(linear2d_layer)
call this_layer % backward(prev_layer % output, gradient)
type is(self_attention_layer)
call this_layer % backward(prev_layer % output, gradient)
end select
end select

end subroutine backward_2d
Expand Down Expand Up @@ -234,6 +249,8 @@ module subroutine forward(self, input)
call this_layer % forward(prev_layer % output)
type is(linear2d_layer)
call this_layer % forward(prev_layer % output)
type is(layernorm_layer)
call this_layer % forward(prev_layer % output)
end select

type is(reshape3d_layer)
Expand All @@ -250,26 +267,40 @@ module subroutine forward(self, input)

type is(linear2d_layer)

! Upstream layers permitted: input2d, linear2d
! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
select type(prev_layer => input % p)
type is(input2d_layer)
call this_layer % forward(prev_layer % output)
type is(linear2d_layer)
call this_layer % forward(prev_layer % output)
type is(self_attention_layer)
call this_layer % forward(prev_layer % output)
type is(layernorm_layer)
call this_layer % forward(prev_layer % output)
end select

type is(self_attention_layer)

! Upstream layers permitted: input2d, linear2d
! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
select type(prev_layer => input % p)
type is(input2d_layer)
call this_layer % forward(prev_layer % output)
type is(linear2d_layer)
call this_layer % forward(prev_layer % output)
type is(self_attention_layer)
call this_layer % forward(prev_layer % output)
type is(layernorm_layer)
call this_layer % forward(prev_layer % output)
end select

type is(layernorm_layer)

! Upstream layers permitted: linear2d, self_attention
select type(prev_layer => input % p)
type is(linear2d_layer)
call this_layer % forward(prev_layer % output)
type is(self_attention_layer)
call this_layer % forward(prev_layer % output)
end select

end select
Expand Down Expand Up @@ -311,6 +342,8 @@ pure module subroutine get_output_2d(self, output)
allocate(output, source=this_layer % output)
type is(self_attention_layer)
allocate(output, source=this_layer % output)
type is(layernorm_layer)
allocate(output, source=this_layer % output)
class default
error stop '2-d output can only be read from an input2d or linear2d layer.'

Expand Down Expand Up @@ -354,8 +387,8 @@ impure elemental module subroutine init(self, input)
call this_layer % init(input % layer_shape)
end select

! The shape of conv2d, dropout, flatten, linear2d, maxpool2d, or
! self_attention layers is not known until we receive an input layer.
! The shape of conv2d, dropout, flatten, linear2d, maxpool2d,
! self_attention or layernorm layers is not known until we receive an input layer.
select type(this_layer => self % p)
type is(conv2d_layer)
self % layer_shape = shape(this_layer % output)
Expand All @@ -367,6 +400,8 @@ impure elemental module subroutine init(self, input)
self % layer_shape = shape(this_layer % output)
type is(self_attention_layer)
self % layer_shape = shape(this_layer % output)
type is(layernorm_layer)
self % layer_shape = shape(this_layer % output)
type is(maxpool2d_layer)
self % layer_shape = shape(this_layer % output)
end select
Expand Down Expand Up @@ -425,6 +460,8 @@ elemental module function get_num_params(self) result(num_params)
num_params = this_layer % get_num_params()
type is (self_attention_layer)
num_params = this_layer % get_num_params()
type is (layernorm_layer)
num_params = this_layer % get_num_params()
class default
error stop 'Unknown layer type.'
end select
Expand Down Expand Up @@ -458,6 +495,8 @@ module function get_params(self) result(params)
params = this_layer % get_params()
type is (self_attention_layer)
params = this_layer % get_params()
type is (layernorm_layer)
params = this_layer % get_params()
class default
error stop 'Unknown layer type.'
end select
Expand Down Expand Up @@ -491,6 +530,8 @@ module function get_gradients(self) result(gradients)
gradients = this_layer % get_gradients()
type is (self_attention_layer)
gradients = this_layer % get_gradients()
type is (layernorm_layer)
gradients = this_layer % get_gradients()
class default
error stop 'Unknown layer type.'
end select
Expand Down Expand Up @@ -549,6 +590,9 @@ module subroutine set_params(self, params)
type is (self_attention_layer)
call this_layer % set_params(params)

type is (layernorm_layer)
call this_layer % set_params(params)

type is (maxpool2d_layer)
! No parameters to set.
write(stderr, '(a)') 'Warning: calling set_params() ' &
Expand Down
92 changes: 92 additions & 0 deletions src/nf/nf_layernorm.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
module nf_layernorm_layer
use nf_activation, only: activation_function
use nf_base_layer, only: base_layer

implicit none

private
public :: layernorm_layer

type, extends(base_layer) :: layernorm_layer
!! Layer Normalization
!! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
!! Based upon `Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton(2016)`:
!! https://arxiv.org/abs/1607.06450v1
integer :: sequence_length
integer :: model_dimension

real :: eps
real, allocatable :: gamma(:)
real, allocatable :: beta(:)

real, allocatable :: d_gamma(:)
real, allocatable :: d_beta(:)
real, allocatable :: gradient(:, :)

real, allocatable :: mu(:, :)
real, allocatable :: sigma(:)

real, allocatable :: output(:, :)

! temp storages
real, allocatable, private :: normalized(:, :)
real, allocatable, private :: one_over_sigma(:, :)
real, allocatable, private :: gradient_by_gamma_over_sigma(:, :)
contains
procedure :: forward
procedure :: backward
procedure :: init
procedure :: get_num_params
procedure :: get_params
procedure :: get_gradients
procedure :: set_params
end type layernorm_layer

interface layernorm_layer
module function layernorm_layer_cons() &
result(res)
type(layernorm_layer) :: res
end function layernorm_layer_cons
end interface layernorm_layer

interface
pure module subroutine forward(self, input)
class(layernorm_layer), intent(in out) :: self
real, intent(in) :: input(:, :)
end subroutine forward

pure module subroutine backward(self, input, gradient)
class(layernorm_layer), intent(in out) :: self
real, intent(in) :: input(:, :)
real, intent(in) :: gradient(:, :)
end subroutine backward

module subroutine init(self, input_shape)
class(layernorm_layer), intent(in out) :: self
integer, intent(in) :: input_shape(:)
end subroutine init

pure module function get_num_params(self) result(num_params)
class(layernorm_layer), intent(in) :: self
integer :: num_params
end function get_num_params


module function get_params(self) result(params)
class(layernorm_layer), intent(in), target :: self
real, allocatable :: params(:)
end function get_params


module function get_gradients(self) result(gradients)
class(layernorm_layer), intent(in), target :: self
real, allocatable :: gradients(:)
end function get_gradients


module subroutine set_params(self, params)
class(layernorm_layer), intent(in out) :: self
real, intent(in), target :: params(:)
end subroutine set_params
end interface
end module nf_layernorm_layer
Loading