@@ -18,7 +18,6 @@ module function locally_connected_1d_layer_cons(filters, kernel_size, activation
18
18
res % filters = filters
19
19
res % activation_name = activation % get_name()
20
20
allocate ( res % activation, source = activation )
21
-
22
21
end function locally_connected_1d_layer_cons
23
22
24
23
module subroutine init (self , input_shape )
@@ -29,16 +28,14 @@ module subroutine init(self, input_shape)
29
28
self % channels = input_shape(1 )
30
29
self % width = input_shape(2 ) - self % kernel_size + 1
31
30
32
- ! Output of shape filters x width
31
+ ! Output of shape: filters x width
33
32
allocate (self % output(self % filters, self % width))
34
33
self % output = 0
35
34
36
- ! Kernel of shape filters x channels x kernel_size
35
+ ! Kernel of shape: filters x channels x kernel_size
37
36
allocate (self % kernel(self % filters, self % channels, self % kernel_size))
38
-
39
- ! Initialize the kernel with random values with a normal distribution
40
37
call random_normal(self % kernel)
41
- self % kernel = self % kernel / self % kernel_size ** 2
38
+ self % kernel = self % kernel / real ( self % kernel_size** 2 )
42
39
43
40
allocate (self % biases(self % filters))
44
41
self % biases = 0
@@ -61,113 +58,93 @@ pure module subroutine forward(self, input)
61
58
implicit none
62
59
class(locally_connected_1d_layer), intent (in out ) :: self
63
60
real , intent (in ) :: input(:,:)
64
- integer :: input_width, input_channels
65
- integer :: i, n, i_out
66
- integer :: iws, iwe
67
- integer :: half_window
61
+ integer :: input_channels, input_width
62
+ integer :: j, n
63
+ integer :: iws, iwe, half_window
68
64
69
- ! Get input dimensions
70
65
input_channels = size (input, dim= 1 )
71
66
input_width = size (input, dim= 2 )
72
-
73
- ! For a kernel of odd size, half_window = kernel_size / 2 (integer division)
74
67
half_window = self % kernel_size / 2
75
68
76
- ! Loop over output indices rather than input indices.
77
- do i_out = 1 , self % width
78
- ! Compute the corresponding center index in the input.
79
- i = i_out + half_window
80
-
81
- ! Define the window in the input corresponding to the filter kernel
82
- iws = i - half_window
83
- iwe = i + half_window
69
+ ! Loop over output positions.
70
+ do j = 1 , self % width
71
+ ! Compute the input window corresponding to output index j.
72
+ ! In forward: center index = j + half_window, so window = indices j to j+kernel_size-1.
73
+ iws = j
74
+ iwe = j + self % kernel_size - 1
84
75
85
- ! Compute the inner tensor product (sum of element-wise products)
86
- ! for each filter across all channels and positions in the kernel.
87
- do concurrent(n = 1 :self % filters)
88
- self % z(n, i_out) = sum (self % kernel(n, :, :) * input(:, iws:iwe))
76
+ ! For each filter, compute the convolution (inner product over channels and kernel width).
77
+ do concurrent (n = 1 :self % filters)
78
+ self % z(n, j) = sum (self % kernel(n, :, :) * input(:, iws:iwe))
89
79
end do
90
80
91
81
! Add the bias for each filter.
92
- self % z(:, i_out ) = self % z(:, i_out ) + self % biases
82
+ self % z(:, j ) = self % z(:, j ) + self % biases
93
83
end do
94
84
95
- ! Apply the activation function to get the final output .
85
+ ! Apply the activation function.
96
86
self % output = self % activation % eval(self % z)
97
87
end subroutine forward
98
88
99
-
100
89
pure module subroutine backward(self, input, gradient)
101
90
implicit none
102
91
class(locally_connected_1d_layer), intent (in out ) :: self
103
- real , intent (in ) :: input(:,:) ! shape: (channels, width)
104
- real , intent (in ) :: gradient(:,:) ! shape: (filters, width)
105
-
106
- ! Local gradient arrays:
107
- real :: db(self % filters)
108
- real :: dw(self % filters, self % channels, self % kernel_size)
109
- real :: gdz(self % filters, size (input, 2 ))
110
-
111
- integer :: i, n, k
112
- integer :: input_channels, input_width
113
- integer :: istart, iend
114
- integer :: iws, iwe
115
- integer :: half_window
116
-
117
- ! Get input dimensions.
92
+ ! 'input' has shape: (channels, input_width)
93
+ ! 'gradient' (dL/dy) has shape: (filters, output_width)
94
+ real , intent (in ) :: input(:,:)
95
+ real , intent (in ) :: gradient(:,:)
96
+
97
+ integer :: input_channels, input_width, output_width
98
+ integer :: j, n, k
99
+ integer :: iws, iwe, half_window
100
+ real :: gdz_val
101
+
102
+ ! Local arrays to accumulate gradients.
103
+ real :: gdz(self % filters, self % width) ! local gradient (dL/dz)
104
+ real :: db_local(self % filters)
105
+ real :: dw_local(self % filters, self % channels, self % kernel_size)
106
+
107
+ ! Determine dimensions.
118
108
input_channels = size (input, dim= 1 )
119
109
input_width = size (input, dim= 2 )
120
-
121
- ! For an odd-sized kernel, half_window = kernel_size / 2.
110
+ output_width = self % width ! Note: output_width = input_width - kernel_size + 1
111
+
122
112
half_window = self % kernel_size / 2
123
-
124
- ! Define the valid output range so that the full input window is available.
125
- istart = half_window + 1
126
- iend = input_width - half_window
127
-
128
- !- --------------------------------------------------------------------
129
- ! Compute the local gradient: gdz = (dL/dy) * sigma'(z)
130
- ! We assume self%z stores the pre-activation values from the forward pass.
131
- gdz = 0.0
132
- gdz(:, istart:iend) = gradient(:, istart:iend) * self % activation % eval_prime(self % z(:, istart:iend))
133
-
134
- !- --------------------------------------------------------------------
135
- ! Compute gradient with respect to biases:
136
- ! dL/db(n) = sum_{i in valid range} gdz(n, i)
137
- do concurrent (n = 1 :self % filters)
138
- db(n) = sum (gdz(n, istart:iend))
113
+
114
+ !- -- Compute the local gradient gdz = (dL/dy) * sigma'(z) for each output.
115
+ do j = 1 , output_width
116
+ gdz(:, j) = gradient(:, j) * self % activation % eval_prime(self % z(:, j))
139
117
end do
140
-
141
- ! Initialize weight gradient and input gradient accumulators.
142
- dw = 0.0
143
- self % gradient = 0.0 ! This array is assumed preallocated to shape (channels, width)
144
-
145
- !- --------------------------------------------------------------------
146
- ! Accumulate gradients over valid output positions.
147
- ! For each output position i, determine the corresponding input window indices.
148
- do concurrent (n = 1 :self % filters, &
149
- k = 1 :self % channels, &
150
- i = istart:iend)
151
- ! The input window corresponding to output index i:
152
- iws = i - half_window
153
- iwe = i + half_window
154
-
155
- ! Weight gradient (dL/dw):
156
- ! For each kernel element, the contribution is the product of the input in the window
157
- ! and the local gradient at the output position i.
158
- dw(n, k, :) = dw(n, k, :) + input(k, iws:iwe) * gdz(n, i)
159
-
160
- ! Input gradient (dL/dx):
161
- ! Distribute the effect of the output gradient back onto the input window,
162
- ! weighted by the kernel weights.
163
- self % gradient(k, iws:iwe) = self % gradient(k, iws:iwe) + self % kernel(n, k, :) * gdz(n, i)
118
+
119
+ !- -- Compute bias gradients: db(n) = sum_j gdz(n, j)
120
+ do n = 1 , self % filters
121
+ db_local(n) = sum (gdz(n, :))
164
122
end do
165
-
166
- !- --------------------------------------------------------------------
167
- ! Accumulate the computed gradients into the layer's stored gradients.
168
- self % dw = self % dw + dw
169
- self % db = self % db + db
170
-
123
+
124
+ !- -- Initialize weight gradient and input gradient accumulators.
125
+ dw_local = 0.0
126
+ self % gradient = 0.0
127
+
128
+ !- -- Accumulate gradients over each output position.
129
+ ! In the forward pass the window for output index j was:
130
+ ! iws = j, iwe = j + kernel_size - 1.
131
+ do n = 1 , self % filters
132
+ do j = 1 , output_width
133
+ iws = j
134
+ iwe = j + self % kernel_size - 1
135
+ do k = 1 , self % channels
136
+ ! Weight gradient: accumulate contribution from the input window.
137
+ dw_local(n, k, :) = dw_local(n, k, :) + input(k, iws:iwe) * gdz(n, j)
138
+ ! Input gradient: propagate gradient back to the input window.
139
+ self % gradient(k, iws:iwe) = self % gradient(k, iws:iwe) + self % kernel(n, k, :) * gdz(n, j)
140
+ end do
141
+ end do
142
+ end do
143
+
144
+ !- -- Update stored gradients.
145
+ self % dw = self % dw + dw_local
146
+ self % db = self % db + db_local
147
+
171
148
end subroutine backward
172
149
173
150
pure module function get_num_params(self) result(num_params)
@@ -197,11 +174,10 @@ module subroutine set_params(self, params)
197
174
real , intent (in ) :: params(:)
198
175
199
176
if (size (params) /= self % get_num_params()) then
200
- error stop ' locally_connected_1d % set_params: Number of parameters does not match'
177
+ error stop ' locally_connected_1d_layer % set_params: Number of parameters does not match'
201
178
end if
202
179
203
180
self % kernel = reshape (params(:product (shape (self % kernel))), shape (self % kernel))
204
-
205
181
associate(n = > product (shape (self % kernel)))
206
182
self % biases = params(n + 1 : n + self % filters)
207
183
end associate
0 commit comments