Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Batch Normalization Layer modules #157

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ add_library(neural
src/nf.f90
src/nf/nf_activation.f90
src/nf/nf_base_layer.f90
src/nf/nf_batchnorm_layer.f90
src/nf/nf_batchnorm_layer_submodule.f90
src/nf/nf_conv2d_layer.f90
src/nf/nf_conv2d_layer_submodule.f90
src/nf/nf_datasets.f90
Expand Down
2 changes: 1 addition & 1 deletion src/nf.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module nf
use nf_datasets_mnist, only: label_digits, load_mnist
use nf_layer, only: layer
use nf_layer_constructors, only: &
conv2d, dense, flatten, input, maxpool2d, reshape
batchnorm, conv2d, dense, flatten, input, maxpool2d, reshape
use nf_network, only: network
use nf_optimizers, only: sgd, rmsprop, adam, adagrad
use nf_activation, only: activation_function, elu, exponential, &
Expand Down
109 changes: 109 additions & 0 deletions src/nf/nf_batchnorm_layer.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
module nf_batchnorm_layer

!! This module provides a batch normalization `batchnorm_layer` type.

use nf_base_layer, only: base_layer
implicit none

private
public :: batchnorm_layer

type, extends(base_layer) :: batchnorm_layer

integer :: num_features
real, allocatable :: gamma(:)
real, allocatable :: beta(:)
real, allocatable :: running_mean(:)
real, allocatable :: running_var(:)
real, allocatable :: input(:,:)
real, allocatable :: output(:,:)
real, allocatable :: gamma_grad(:)
real, allocatable :: beta_grad(:)
real, allocatable :: input_grad(:,:)
real :: epsilon = 1e-5

contains

procedure :: forward
procedure :: backward
procedure :: get_gradients
procedure :: get_num_params
procedure :: get_params
procedure :: init
procedure :: set_params

end type batchnorm_layer

interface batchnorm_layer
pure module function batchnorm_layer_cons(num_features) result(res)
!! `batchnorm_layer` constructor function
integer, intent(in) :: num_features
type(batchnorm_layer) :: res
end function batchnorm_layer_cons
end interface batchnorm_layer

interface

module subroutine init(self, input_shape)
!! Initialize the layer data structures.
!!
!! This is a deferred procedure from the `base_layer` abstract type.
class(batchnorm_layer), intent(in out) :: self
!! A `batchnorm_layer` instance
integer, intent(in) :: input_shape(:)
!! Input layer dimensions
end subroutine init

pure module subroutine forward(self, input)
!! Apply a forward pass on the `batchnorm_layer`.
class(batchnorm_layer), intent(in out) :: self
!! A `batchnorm_layer` instance
real, intent(in) :: input(:,:)
!! Input data
end subroutine forward

pure module subroutine backward(self, input, gradient)
!! Apply a backward pass on the `batchnorm_layer`.
class(batchnorm_layer), intent(in out) :: self
!! A `batchnorm_layer` instance
real, intent(in) :: input(:,:)
!! Input data (previous layer)
real, intent(in) :: gradient(:,:)
!! Gradient (next layer)
end subroutine backward

pure module function get_num_params(self) result(num_params)
!! Get the number of parameters in the layer.
class(batchnorm_layer), intent(in) :: self
!! A `batchnorm_layer` instance
integer :: num_params
!! Number of parameters
end function get_num_params

pure module function get_params(self) result(params)
!! Return the parameters (gamma, beta, running_mean, running_var) of this layer.
class(batchnorm_layer), intent(in) :: self
!! A `batchnorm_layer` instance
real, allocatable :: params(:)
!! Parameters to get
end function get_params

pure module function get_gradients(self) result(gradients)
!! Return the gradients of this layer.
class(batchnorm_layer), intent(in) :: self
!! A `batchnorm_layer` instance
real, allocatable :: gradients(:)
!! Gradients to get
end function get_gradients

module subroutine set_params(self, params)
!! Set the parameters of the layer.
class(batchnorm_layer), intent(in out) :: self
!! A `batchnorm_layer` instance
real, intent(in) :: params(:)
!! Parameters to set
end subroutine set_params

end interface

end module nf_batchnorm_layer
105 changes: 105 additions & 0 deletions src/nf/nf_batchnorm_layer_submodule.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
submodule(nf_batchnorm_layer) nf_batchnorm_layer_submodule

implicit none

contains

pure module function batchnorm_layer_cons(num_features) result(res)
implicit none
integer, intent(in) :: num_features
type(batchnorm_layer) :: res

res % num_features = num_features
allocate(res % gamma(num_features), source=1.0)
allocate(res % beta(num_features))
allocate(res % running_mean(num_features), source=0.0)
allocate(res % running_var(num_features), source=1.0)
allocate(res % input(num_features, num_features))
allocate(res % output(num_features, num_features))
Comment on lines +17 to +18
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the shape for these should be (num_features, num_features), but rather (batch_size, num_features). The batch_size also won't be known until the first forward pass, so we should defer the allocation until then. In the forward pass, we could have a simple allocated check to see if they have not been allocated then, and allocate them to the shape of the input.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I meant (num_features, batch_size).

allocate(res % gamma_grad(num_features))
allocate(res % beta_grad(num_features))
allocate(res % input_grad(num_features, num_features))

end function batchnorm_layer_cons

module subroutine init(self, input_shape)
implicit none
class(batchnorm_layer), intent(in out) :: self
integer, intent(in) :: input_shape(:)

self % input = 0
self % output = 0

! Initialize gamma, beta, running_mean, and running_var
self % gamma = 1.0
self % beta = 0.0
self % running_mean = 0.0
self % running_var = 1.0

end subroutine init

pure module subroutine forward(self, input)
implicit none
class(batchnorm_layer), intent(in out) :: self
real, intent(in) :: input(:,:)

! Store input for backward pass
self % input = input

associate( &
! Normalize the input
normalized_input => (input - reshape(self % running_mean, shape(input, 1))) &
/ sqrt(reshape(self % running_var, shape(input, 1)) + self % epsilon) &
Comment on lines +51 to +52
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

running_mean and running_var are not yet updated anywhere, only initialized.

)

! Batch normalization forward pass
self % output = reshape(self % gamma, shape(input, 1)) * normalized_input &
+ reshape(self % beta, shape(input, 1))

end associate

end subroutine forward

pure module subroutine backward(self, input, gradient)
implicit none
class(batchnorm_layer), intent(in out) :: self
real, intent(in) :: input(:,:)
real, intent(in) :: gradient(:,:)

! Calculate gradients for gamma, beta
self % gamma_grad = sum(gradient * (input - reshape(self % running_mean, shape(input, 1))) &
/ sqrt(reshape(self % running_var, shape(input, 1)) + self % epsilon), dim=2)
self % beta_grad = sum(gradient, dim=2)

! Calculate gradients for input
self % input_grad = gradient * reshape(self % gamma, shape(input, 1)) &
/ sqrt(reshape(self % running_var, shape(input, 1)) + self % epsilon)

end subroutine backward

pure module function get_num_params(self) result(num_params)
class(batchnorm_layer), intent(in) :: self
integer :: num_params
num_params = 2 * self % num_features
end function get_num_params

pure module function get_params(self) result(params)
class(batchnorm_layer), intent(in) :: self
real, allocatable :: params(:)
params = [self % gamma, self % beta]
end function get_params

pure module function get_gradients(self) result(gradients)
class(batchnorm_layer), intent(in) :: self
real, allocatable :: gradients(:)
gradients = [self % gamma_grad, self % beta_grad]
end function get_gradients

module subroutine set_params(self, params)
class(batchnorm_layer), intent(in out) :: self
real, intent(in) :: params(:)
self % gamma = params(1:self % num_features)
self % beta = params(self % num_features+1:2*self % num_features)
end subroutine set_params

end submodule nf_batchnorm_layer_submodule
21 changes: 20 additions & 1 deletion src/nf/nf_layer_constructors.f90
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ module nf_layer_constructors
implicit none

private
public :: conv2d, dense, flatten, input, maxpool2d, reshape
public :: batchnorm, conv2d, dense, flatten, input, maxpool2d, reshape

interface input

Expand Down Expand Up @@ -106,6 +106,25 @@ pure module function flatten() result(res)
!! Resulting layer instance
end function flatten

pure module function batchnorm(num_features) result(res)
!! Batch normalization layer constructor.
!!
!! This layer is for adding batch normalization to the network.
!! A batch normalization layer can be used after conv2d or dense layers.
!!
!! Example:
!!
!! ```
!! use nf, only :: batchnorm, layer
!! type(layer) :: batchnorm_layer
!! batchnorm_layer = batchnorm(num_features = 64)
!! ```
integer, intent(in) :: num_features
!! Number of features in the Layer
type(layer) :: res
!! Resulting layer instance
end function batchnorm

pure module function conv2d(filters, kernel_size, activation) result(res)
!! 2-d convolutional layer constructor.
!!
Expand Down
8 changes: 8 additions & 0 deletions src/nf/nf_layer_constructors_submodule.f90
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
submodule(nf_layer_constructors) nf_layer_constructors_submodule

use nf_layer, only: layer
use nf_batchnorm_layer, only: batchnorm_layer
use nf_conv2d_layer, only: conv2d_layer
use nf_dense_layer, only: dense_layer
use nf_flatten_layer, only: flatten_layer
Expand All @@ -14,6 +15,13 @@

contains

pure module function batchnorm(num_features) result(res)
integer, intent(in) :: num_features
type(layer) :: res
res % name = 'batchnorm'
allocate(res % p, source=batchnorm_layer(num_features))
end function batchnorm

pure module function conv2d(filters, kernel_size, activation) result(res)
integer, intent(in) :: filters
integer, intent(in) :: kernel_size
Expand Down
1 change: 1 addition & 0 deletions src/nf/nf_layer_submodule.f90
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
submodule(nf_layer) nf_layer_submodule

use iso_fortran_env, only: stderr => error_unit
use nf_batchnorm_layer, only: batchnorm_layer
use nf_conv2d_layer, only: conv2d_layer
use nf_dense_layer, only: dense_layer
use nf_flatten_layer, only: flatten_layer
Expand Down
2 changes: 1 addition & 1 deletion src/nf/nf_network_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
use nf_io_hdf5, only: get_hdf5_dataset
use nf_keras, only: get_keras_h5_layers, keras_layer
use nf_layer, only: layer
use nf_layer_constructors, only: conv2d, dense, flatten, input, maxpool2d, reshape
use nf_layer_constructors, only: batchnorm, conv2d, dense, flatten, input, maxpool2d, reshape
use nf_loss, only: quadratic_derivative
use nf_optimizers, only: optimizer_base_type, sgd
use nf_parallel, only: tile_indices
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ foreach(execid
cnn_from_keras
conv2d_network
optimizers
batchnorm_layer
)
add_executable(test_${execid} test_${execid}.f90)
target_link_libraries(test_${execid} PRIVATE neural h5fortran::h5fortran jsonfortran::jsonfortran ${LIBS})
Expand Down
65 changes: 65 additions & 0 deletions test/test_batchnorm_layer.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
program test_batchnorm_layer

use iso_fortran_env, only: stderr => error_unit
use nf, only: batchnorm, layer
use nf_batchnorm_layer, only: batchnorm_layer

implicit none

type(layer) :: bn_layer
integer, parameter :: num_features = 64
real, allocatable :: sample_input(:,:)
real, allocatable :: output(:,:)
real, allocatable :: gradient(:,:)
integer, parameter :: input_shape(1) = [num_features]
real, allocatable :: gamma_grad(:), beta_grad(:)
real, parameter :: tolerance = 1e-7
logical :: ok = .true.

bn_layer = batchnorm(num_features)

if (.not. bn_layer % name == 'batchnorm') then
ok = .false.
write(stderr, '(a)') 'batchnorm layer has its name set correctly.. failed'
end if

if (bn_layer % initialized) then
ok = .false.
write(stderr, '(a)') 'batchnorm layer should not be marked as initialized yet.. failed'
end if

! Initialize sample input and gradient
allocate(sample_input(num_features, 1))
allocate(gradient(num_features, 1))
sample_input = 1.0
gradient = 2.0

!TODO run forward and backward passes directly on the batchnorm_layer instance
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll add a simple test directly on the batchnorm_layer instance rather than the high-level layer_type instance so you get the idea how it will work.

!TODO since we don't yet support tiying in with the input layer.

!TODO Retrieve output and check normalization
!call bn_layer % get_output(output)
!if (.not. all(abs(output - sample_input) < tolerance)) then
! ok = .false.
! write(stderr, '(a)') 'batchnorm layer output should be close to input.. failed'
!end if

!TODO Retrieve gamma and beta gradients
!allocate(gamma_grad(num_features))
!allocate(beta_grad(num_features))
!call bn_layer % get_gradients(gamma_grad, beta_grad)

!if (.not. all(beta_grad == sum(gradient))) then
! ok = .false.
! write(stderr, '(a)') 'batchnorm layer beta gradients are incorrect.. failed'
!end if

! Report test results
if (ok) then
print '(a)', 'test_batchnorm_layer: All tests passed.'
else
write(stderr, '(a)') 'test_batchnorm_layer: One or more tests failed.'
stop 1
end if

end program test_batchnorm_layer