Skip to content

Commit 6dfaed0

Browse files
jvdp1Vandenplas, Jeremiemilancurcic
authored
Replacement of a matmul + use of merge (#181)
* dense_layer: replace a matmul(reshape) by a do concurrent * nf_activation: replace some where statements by merge intrinsic * Set correct size for self%gradient in dense_layer * remove some unneeded pack() * Remove notes on -fno-frontend-optimize (no longer necessary) * Bump patch version --------- Co-authored-by: Vandenplas, Jeremie <[email protected]> Co-authored-by: milancurcic <[email protected]>
1 parent c3924b5 commit 6dfaed0

File tree

5 files changed

+23
-45
lines changed

5 files changed

+23
-45
lines changed

README.md

+5-7
Original file line numberDiff line numberDiff line change
@@ -80,23 +80,21 @@ With gfortran, the following will create an optimized build of neural-fortran:
8080
```
8181
fpm build \
8282
--profile release \
83-
--flag "-fno-frontend-optimize -I$HDF5INC -L$HDF5LIB"
83+
--flag "-I$HDF5INC -L$HDF5LIB"
8484
```
8585

8686
HDF5 is now a required dependency, so you have to provide it to fpm.
8787
The above command assumes that the `HDF5INC` and `HDF5LIB` environment
8888
variables are set to the include and library paths, respectively, of your
8989
HDF5 install.
90-
The `-fno-frontend-optimize` disables some optimizations that may be harmful
91-
when building neural-fortran.
9290

9391
If you use Conda, the following instructions work:
9492

9593
```
9694
conda create -n nf hdf5
9795
conda activate nf
98-
fpm build --profile release --flag "-fno-frontend-optimize -I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib"
99-
fpm test --profile release --flag "-fno-frontend-optimize -I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib"
96+
fpm build --profile release --flag "-I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib"
97+
fpm test --profile release --flag "-I$CONDA_PREFIX/include -L$CONDA_PREFIX/lib -Wl,-rpath -Wl,$CONDA_PREFIX/lib"
10098
```
10199

102100
#### Building in parallel mode
@@ -110,15 +108,15 @@ in parallel, respectively:
110108
fpm build \
111109
--compiler caf \
112110
--profile release \
113-
--flag "-fno-frontend-optimize -I$HDF5INC -L$HDF5LIB"
111+
--flag "-I$HDF5INC -L$HDF5LIB"
114112
```
115113

116114
#### Testing with fpm
117115

118116
```
119117
fpm test \
120118
--profile release \
121-
--flag "-fno-frontend-optimize -I$HDF5INC -L$HDF5LIB"
119+
--flag "-I$HDF5INC -L$HDF5LIB"
122120
```
123121

124122
For the time being, you need to specify the same compiler flags to `fpm test`

fpm.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "neural-fortran"
2-
version = "0.16.0"
2+
version = "0.16.1"
33
license = "MIT"
44
author = "Milan Curcic"
55
maintainer = "[email protected]"

src/nf/nf_activation.f90

+6-30
Original file line numberDiff line numberDiff line change
@@ -295,11 +295,7 @@ pure function eval_1d_relu_prime(self, x) result(res)
295295
class(relu), intent(in) :: self
296296
real, intent(in) :: x(:)
297297
real :: res(size(x))
298-
where (x > 0)
299-
res = 1
300-
elsewhere
301-
res = 0
302-
end where
298+
res = merge(1., 0., x > 0)
303299
end function eval_1d_relu_prime
304300

305301
pure function eval_3d_relu(self, x) result(res)
@@ -315,11 +311,7 @@ pure function eval_3d_relu_prime(self, x) result(res)
315311
class(relu), intent(in) :: self
316312
real, intent(in) :: x(:,:,:)
317313
real :: res(size(x,1),size(x,2),size(x,3))
318-
where (x > 0)
319-
res = 1
320-
elsewhere
321-
res = 0
322-
end where
314+
res = merge(1., 0., x > 0)
323315
end function eval_3d_relu_prime
324316

325317
pure function eval_1d_leaky_relu(self, x) result(res)
@@ -335,11 +327,7 @@ pure function eval_1d_leaky_relu_prime(self, x) result(res)
335327
class(leaky_relu), intent(in) :: self
336328
real, intent(in) :: x(:)
337329
real :: res(size(x))
338-
where (x > 0)
339-
res = 1
340-
elsewhere
341-
res = self % alpha
342-
end where
330+
res = merge(1., self%alpha, x > 0)
343331
end function eval_1d_leaky_relu_prime
344332

345333
pure function eval_3d_leaky_relu(self, x) result(res)
@@ -355,11 +343,7 @@ pure function eval_3d_leaky_relu_prime(self, x) result(res)
355343
class(leaky_relu), intent(in) :: self
356344
real, intent(in) :: x(:,:,:)
357345
real :: res(size(x,1),size(x,2),size(x,3))
358-
where (x > 0)
359-
res = 1
360-
elsewhere
361-
res = self % alpha
362-
end where
346+
res = merge(1., self%alpha, x > 0)
363347
end function eval_3d_leaky_relu_prime
364348

365349
pure function eval_1d_sigmoid(self, x) result(res)
@@ -465,11 +449,7 @@ pure function eval_1d_step(self, x) result(res)
465449
class(step), intent(in) :: self
466450
real, intent(in) :: x(:)
467451
real :: res(size(x))
468-
where (x > 0)
469-
res = 1
470-
elsewhere
471-
res = 0
472-
end where
452+
res = merge(1., 0., x > 0)
473453
end function eval_1d_step
474454

475455
pure function eval_1d_step_prime(self, x) result(res)
@@ -485,11 +465,7 @@ pure function eval_3d_step(self, x) result(res)
485465
class(step), intent(in) :: self
486466
real, intent(in) :: x(:,:,:)
487467
real :: res(size(x,1),size(x,2),size(x,3))
488-
where (x > 0)
489-
res = 1
490-
elsewhere
491-
res = 0
492-
end where
468+
res = merge(1., 0., x > 0)
493469
end function eval_3d_step
494470

495471
pure function eval_3d_step_prime(self, x) result(res)

src/nf/nf_conv2d_layer_submodule.f90

+2-2
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ pure module function get_params(self) result(params)
195195

196196
params = [ &
197197
pack(self % kernel, .true.), &
198-
pack(self % biases, .true.) &
198+
self % biases &
199199
]
200200

201201
end function get_params
@@ -207,7 +207,7 @@ pure module function get_gradients(self) result(gradients)
207207

208208
gradients = [ &
209209
pack(self % dw, .true.), &
210-
pack(self % db, .true.) &
210+
self % db &
211211
]
212212

213213
end function get_gradients

src/nf/nf_dense_layer_submodule.f90

+9-5
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,15 @@ pure module subroutine backward(self, input, gradient)
2727
real, intent(in) :: gradient(:)
2828
real :: db(self % output_size)
2929
real :: dw(self % input_size, self % output_size)
30+
integer :: i
3031

3132
db = gradient * self % activation % eval_prime(self % z)
32-
dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)]))
33+
! dw = matmul(reshape(input, [size(input), 1]), reshape(db, [1, size(db)]))
34+
do concurrent (i = 1:size(db))
35+
self % dw(:,i) = self % dw(:,i) + input(:) * db(i)
36+
enddo
3337
self % gradient = matmul(self % weights, db)
34-
self % dw = self % dw + dw
38+
! self % dw = self % dw + dw
3539
self % db = self % db + db
3640

3741
end subroutine backward
@@ -63,7 +67,7 @@ pure module function get_params(self) result(params)
6367

6468
params = [ &
6569
pack(self % weights, .true.), &
66-
pack(self % biases, .true.) &
70+
self % biases &
6771
]
6872

6973
end function get_params
@@ -75,7 +79,7 @@ pure module function get_gradients(self) result(gradients)
7579

7680
gradients = [ &
7781
pack(self % dw, .true.), &
78-
pack(self % db, .true.) &
82+
self % db &
7983
]
8084

8185
end function get_gradients
@@ -135,7 +139,7 @@ module subroutine init(self, input_shape)
135139
allocate(self % db(self % output_size))
136140
self % db = 0
137141

138-
allocate(self % gradient(self % output_size))
142+
allocate(self % gradient(self % input_size))
139143
self % gradient = 0
140144

141145
end subroutine init

0 commit comments

Comments
 (0)