diff --git a/README.md b/README.md
index c33f7932..20d34f61 100644
--- a/README.md
+++ b/README.md
@@ -29,13 +29,12 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
 |------------|------------------|------------------------|----------------------|--------------|---------------|
 | Input | `input` | n/a | 1, 3 | n/a | n/a |
 | Dense (fully-connected) | `dense` | `input1d`, `flatten` | 1 | ✅ | ✅ |
-| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ❌ |
+| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) |
 | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ |
 | Flatten | `flatten` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ |
 | Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 | ✅ | ✅ |
 
-**Note:** The training of convolutional layers has been discovered to be broken
-as of release 0.13.0. This will be fixed in a future (hopefully next) release.
+(*) See Issue [#145](https://github.com/modern-fortran/neural-fortran/issues/145) regarding non-converging CNN training on the MNIST dataset.
 
 ## Getting started
 
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
index 002a83ba..91de36ed 100644
--- a/src/nf/nf_layer_constructors_submodule.f90
+++ b/src/nf/nf_layer_constructors_submodule.f90
@@ -8,7 +8,7 @@
   use nf_input3d_layer, only: input3d_layer
   use nf_maxpool2d_layer, only: maxpool2d_layer
   use nf_reshape_layer, only: reshape3d_layer
-  use nf_activation, only: activation_function, sigmoid
+  use nf_activation, only: activation_function, relu, sigmoid
 
   implicit none
 
@@ -27,7 +27,7 @@ pure module function conv2d(filters, kernel_size, activation) result(res)
     if (present(activation)) then
       allocate(activation_tmp, source=activation)
     else
-      allocate(activation_tmp, source=sigmoid())
+      allocate(activation_tmp, source=relu())
     end if
 
     res % activation = activation_tmp % get_name()
diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90
index 5bafb7cf..a35064bd 100644
--- a/src/nf/nf_network_submodule.f90
+++ b/src/nf/nf_network_submodule.f90
@@ -305,12 +305,14 @@ pure module subroutine backward(self, output)
         select type(next_layer => self % layers(n + 1) % p)
           type is(dense_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
-          type is(flatten_layer)
-            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
           type is(conv2d_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+          type is(flatten_layer)
+            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
           type is(maxpool2d_layer)
             call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
+          type is(reshape3d_layer)
+            call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient)
         end select
       end if
 
diff --git a/test/test_conv2d_layer.f90 b/test/test_conv2d_layer.f90
index ae0474ee..5840063b 100644
--- a/test/test_conv2d_layer.f90
+++ b/test/test_conv2d_layer.f90
@@ -24,9 +24,9 @@ program test_conv2d_layer
     write(stderr, '(a)') 'conv2d layer should not be marked as initialized yet.. failed'
   end if
 
-  if (.not. conv_layer % activation == 'sigmoid') then
+  if (.not. conv_layer % activation == 'relu') then
     ok = .false.
-    write(stderr, '(a)') 'conv2d layer is defaults to sigmoid activation.. failed'
+    write(stderr, '(a)') 'conv2d layer defaults to relu activation.. failed'
   end if
 
   input_layer = input([3, 32, 32])
@@ -62,7 +62,7 @@ program test_conv2d_layer
   call conv_layer % forward(input_layer)
   call conv_layer % get_output(output)
 
-  if (.not. all(abs(output - 0.5) < tolerance)) then
+  if (.not. all(abs(output) < tolerance)) then
     ok = .false.
     write(stderr, '(a)') 'conv2d layer with zero input and sigmoid function must forward to all 0.5.. failed'
   end if
diff --git a/test/test_conv2d_network.f90 b/test/test_conv2d_network.f90
index 4d87a520..47c9a819 100644
--- a/test/test_conv2d_network.f90
+++ b/test/test_conv2d_network.f90
@@ -1,7 +1,7 @@
 program test_conv2d_network
 
   use iso_fortran_env, only: stderr => error_unit
-  use nf, only: conv2d, input, network
+  use nf, only: conv2d, input, network, dense, sgd, maxpool2d
 
   implicit none
 
@@ -21,6 +21,7 @@ program test_conv2d_network
     ok = .false.
   end if
 
+  ! Test for output shape
   allocate(sample_input(3, 32, 32))
   sample_input = 0
 
@@ -32,6 +33,115 @@ program test_conv2d_network
     ok = .false.
   end if
 
+  deallocate(sample_input, output)
+
+  training1: block
+
+    type(network) :: cnn
+    real :: y(1)
+    real :: tolerance = 1e-5
+    integer :: n
+    integer, parameter :: num_iterations = 1000
+
+    ! Test training of a minimal constant mapping
+    allocate(sample_input(1, 5, 5))
+    call random_number(sample_input)
+
+    cnn = network([ &
+      input(shape(sample_input)), &
+      conv2d(filters=1, kernel_size=3), &
+      conv2d(filters=1, kernel_size=3), &
+      dense(1) &
+    ])
+
+    y = [0.1234567]
+
+    do n = 1, num_iterations
+      call cnn % forward(sample_input)
+      call cnn % backward(y)
+      call cnn % update(optimizer=sgd(learning_rate=1.))
+      if (all(abs(cnn % predict(sample_input) - y) < tolerance)) exit
+    end do
+
+    if (.not. n <= num_iterations) then
+      write(stderr, '(a)') &
+        'convolutional network 1 should converge in simple training.. failed'
+      ok = .false.
+    end if
+
+  end block training1
+
+  training2: block
+
+    type(network) :: cnn
+    real :: x(1, 8, 8)
+    real :: y(1)
+    real :: tolerance = 1e-5
+    integer :: n
+    integer, parameter :: num_iterations = 1000
+
+    call random_number(x)
+    y = [0.1234567]
+
+    cnn = network([ &
+      input(shape(x)), &
+      conv2d(filters=1, kernel_size=3), &
+      maxpool2d(pool_size=2), &
+      conv2d(filters=1, kernel_size=3), &
+      dense(1) &
+    ])
+
+    do n = 1, num_iterations
+      call cnn % forward(x)
+      call cnn % backward(y)
+      call cnn % update(optimizer=sgd(learning_rate=1.))
+      if (all(abs(cnn % predict(x) - y) < tolerance)) exit
+    end do
+
+    if (.not. n <= num_iterations) then
+      write(stderr, '(a)') &
+        'convolutional network 2 should converge in simple training.. failed'
+      ok = .false.
+    end if
+
+  end block training2
+
+  training3: block
+
+    type(network) :: cnn
+    real :: x(1, 12, 12)
+    real :: y(9)
+    real :: tolerance = 1e-5
+    integer :: n
+    integer, parameter :: num_iterations = 5000
+
+    call random_number(x)
+    y = [0.12345, 0.23456, 0.34567, 0.45678, 0.56789, 0.67890, 0.78901, 0.89012, 0.90123]
+
+    cnn = network([ &
+      input(shape(x)), &
+      conv2d(filters=1, kernel_size=3), & ! 1x12x12 input, 1x10x10 output
+      maxpool2d(pool_size=2), &           ! 1x10x10 input, 1x5x5 output
+      conv2d(filters=1, kernel_size=3), & ! 1x5x5 input, 1x3x3 output
+      dense(9) &                          ! 9 outputs
+    ])
+
+    do n = 1, num_iterations
+      call cnn % forward(x)
+      call cnn % backward(y)
+      call cnn % update(optimizer=sgd(learning_rate=1.))
+      if (all(abs(cnn % predict(x) - y) < tolerance)) exit
+    end do
+
+    if (.not. n <= num_iterations) then
+      write(stderr, '(a)') &
+        'convolutional network 3 should converge in simple training.. failed'
+      ok = .false.
+    end if
+
+  end block training3
+
+
   if (ok) then
     print '(a)', 'test_conv2d_network: All tests passed.'
   else