-
Notifications
You must be signed in to change notification settings - Fork 86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added Batch Normalization Layer modules #157
base: main
Are you sure you want to change the base?
Changes from all commits
3b32c95
1a0ff08
e4d8e1e
42335f1
de67a88
b1e0d39
e8d040a
17b0610
7fb69f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
module nf_batchnorm_layer | ||
|
||
!! This module provides a batch normalization `batchnorm_layer` type. | ||
|
||
use nf_base_layer, only: base_layer | ||
implicit none | ||
|
||
private | ||
public :: batchnorm_layer | ||
|
||
type, extends(base_layer) :: batchnorm_layer | ||
|
||
integer :: num_features | ||
real, allocatable :: gamma(:) | ||
real, allocatable :: beta(:) | ||
real, allocatable :: running_mean(:) | ||
real, allocatable :: running_var(:) | ||
real, allocatable :: input(:,:) | ||
real, allocatable :: output(:,:) | ||
real, allocatable :: gamma_grad(:) | ||
real, allocatable :: beta_grad(:) | ||
real, allocatable :: input_grad(:,:) | ||
real :: epsilon = 1e-5 | ||
|
||
contains | ||
|
||
procedure :: forward | ||
procedure :: backward | ||
procedure :: get_gradients | ||
procedure :: get_num_params | ||
procedure :: get_params | ||
procedure :: init | ||
procedure :: set_params | ||
|
||
end type batchnorm_layer | ||
|
||
interface batchnorm_layer | ||
pure module function batchnorm_layer_cons(num_features) result(res) | ||
!! `batchnorm_layer` constructor function | ||
integer, intent(in) :: num_features | ||
type(batchnorm_layer) :: res | ||
end function batchnorm_layer_cons | ||
end interface batchnorm_layer | ||
|
||
interface | ||
|
||
module subroutine init(self, input_shape) | ||
!! Initialize the layer data structures. | ||
!! | ||
!! This is a deferred procedure from the `base_layer` abstract type. | ||
class(batchnorm_layer), intent(in out) :: self | ||
!! A `batchnorm_layer` instance | ||
integer, intent(in) :: input_shape(:) | ||
!! Input layer dimensions | ||
end subroutine init | ||
|
||
pure module subroutine forward(self, input) | ||
!! Apply a forward pass on the `batchnorm_layer`. | ||
class(batchnorm_layer), intent(in out) :: self | ||
!! A `batchnorm_layer` instance | ||
real, intent(in) :: input(:,:) | ||
!! Input data | ||
end subroutine forward | ||
|
||
pure module subroutine backward(self, input, gradient) | ||
!! Apply a backward pass on the `batchnorm_layer`. | ||
class(batchnorm_layer), intent(in out) :: self | ||
!! A `batchnorm_layer` instance | ||
real, intent(in) :: input(:,:) | ||
!! Input data (previous layer) | ||
real, intent(in) :: gradient(:,:) | ||
!! Gradient (next layer) | ||
end subroutine backward | ||
|
||
pure module function get_num_params(self) result(num_params) | ||
!! Get the number of parameters in the layer. | ||
class(batchnorm_layer), intent(in) :: self | ||
!! A `batchnorm_layer` instance | ||
integer :: num_params | ||
!! Number of parameters | ||
end function get_num_params | ||
|
||
pure module function get_params(self) result(params) | ||
!! Return the parameters (gamma, beta, running_mean, running_var) of this layer. | ||
class(batchnorm_layer), intent(in) :: self | ||
!! A `batchnorm_layer` instance | ||
real, allocatable :: params(:) | ||
!! Parameters to get | ||
end function get_params | ||
|
||
pure module function get_gradients(self) result(gradients) | ||
!! Return the gradients of this layer. | ||
class(batchnorm_layer), intent(in) :: self | ||
!! A `batchnorm_layer` instance | ||
real, allocatable :: gradients(:) | ||
!! Gradients to get | ||
end function get_gradients | ||
|
||
module subroutine set_params(self, params) | ||
!! Set the parameters of the layer. | ||
class(batchnorm_layer), intent(in out) :: self | ||
!! A `batchnorm_layer` instance | ||
real, intent(in) :: params(:) | ||
!! Parameters to set | ||
end subroutine set_params | ||
|
||
end interface | ||
|
||
end module nf_batchnorm_layer |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
submodule(nf_batchnorm_layer) nf_batchnorm_layer_submodule | ||
|
||
implicit none | ||
|
||
contains | ||
|
||
pure module function batchnorm_layer_cons(num_features) result(res) | ||
implicit none | ||
integer, intent(in) :: num_features | ||
type(batchnorm_layer) :: res | ||
|
||
res % num_features = num_features | ||
allocate(res % gamma(num_features), source=1.0) | ||
allocate(res % beta(num_features)) | ||
allocate(res % running_mean(num_features), source=0.0) | ||
allocate(res % running_var(num_features), source=1.0) | ||
allocate(res % input(num_features, num_features)) | ||
allocate(res % output(num_features, num_features)) | ||
allocate(res % gamma_grad(num_features)) | ||
allocate(res % beta_grad(num_features)) | ||
allocate(res % input_grad(num_features, num_features)) | ||
|
||
end function batchnorm_layer_cons | ||
|
||
module subroutine init(self, input_shape) | ||
implicit none | ||
class(batchnorm_layer), intent(in out) :: self | ||
integer, intent(in) :: input_shape(:) | ||
|
||
self % input = 0 | ||
self % output = 0 | ||
|
||
! Initialize gamma, beta, running_mean, and running_var | ||
self % gamma = 1.0 | ||
self % beta = 0.0 | ||
self % running_mean = 0.0 | ||
self % running_var = 1.0 | ||
|
||
end subroutine init | ||
|
||
pure module subroutine forward(self, input) | ||
implicit none | ||
class(batchnorm_layer), intent(in out) :: self | ||
real, intent(in) :: input(:,:) | ||
|
||
! Store input for backward pass | ||
self % input = input | ||
|
||
associate( & | ||
! Normalize the input | ||
normalized_input => (input - reshape(self % running_mean, shape(input, 1))) & | ||
/ sqrt(reshape(self % running_var, shape(input, 1)) + self % epsilon) & | ||
Comment on lines
+51
to
+52
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
) | ||
|
||
! Batch normalization forward pass | ||
self % output = reshape(self % gamma, shape(input, 1)) * normalized_input & | ||
+ reshape(self % beta, shape(input, 1)) | ||
|
||
end associate | ||
|
||
end subroutine forward | ||
|
||
pure module subroutine backward(self, input, gradient) | ||
implicit none | ||
class(batchnorm_layer), intent(in out) :: self | ||
real, intent(in) :: input(:,:) | ||
real, intent(in) :: gradient(:,:) | ||
|
||
! Calculate gradients for gamma, beta | ||
self % gamma_grad = sum(gradient * (input - reshape(self % running_mean, shape(input, 1))) & | ||
/ sqrt(reshape(self % running_var, shape(input, 1)) + self % epsilon), dim=2) | ||
self % beta_grad = sum(gradient, dim=2) | ||
|
||
! Calculate gradients for input | ||
self % input_grad = gradient * reshape(self % gamma, shape(input, 1)) & | ||
/ sqrt(reshape(self % running_var, shape(input, 1)) + self % epsilon) | ||
|
||
end subroutine backward | ||
|
||
pure module function get_num_params(self) result(num_params) | ||
class(batchnorm_layer), intent(in) :: self | ||
integer :: num_params | ||
num_params = 2 * self % num_features | ||
end function get_num_params | ||
|
||
pure module function get_params(self) result(params) | ||
class(batchnorm_layer), intent(in) :: self | ||
real, allocatable :: params(:) | ||
params = [self % gamma, self % beta] | ||
end function get_params | ||
|
||
pure module function get_gradients(self) result(gradients) | ||
class(batchnorm_layer), intent(in) :: self | ||
real, allocatable :: gradients(:) | ||
gradients = [self % gamma_grad, self % beta_grad] | ||
end function get_gradients | ||
|
||
module subroutine set_params(self, params) | ||
class(batchnorm_layer), intent(in out) :: self | ||
real, intent(in) :: params(:) | ||
self % gamma = params(1:self % num_features) | ||
self % beta = params(self % num_features+1:2*self % num_features) | ||
end subroutine set_params | ||
|
||
end submodule nf_batchnorm_layer_submodule |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
program test_batchnorm_layer | ||
|
||
use iso_fortran_env, only: stderr => error_unit | ||
use nf, only: batchnorm, layer | ||
use nf_batchnorm_layer, only: batchnorm_layer | ||
|
||
implicit none | ||
|
||
type(layer) :: bn_layer | ||
integer, parameter :: num_features = 64 | ||
real, allocatable :: sample_input(:,:) | ||
real, allocatable :: output(:,:) | ||
real, allocatable :: gradient(:,:) | ||
integer, parameter :: input_shape(1) = [num_features] | ||
real, allocatable :: gamma_grad(:), beta_grad(:) | ||
real, parameter :: tolerance = 1e-7 | ||
logical :: ok = .true. | ||
|
||
bn_layer = batchnorm(num_features) | ||
|
||
if (.not. bn_layer % name == 'batchnorm') then | ||
ok = .false. | ||
write(stderr, '(a)') 'batchnorm layer has its name set correctly.. failed' | ||
end if | ||
|
||
if (bn_layer % initialized) then | ||
ok = .false. | ||
write(stderr, '(a)') 'batchnorm layer should not be marked as initialized yet.. failed' | ||
end if | ||
|
||
! Initialize sample input and gradient | ||
allocate(sample_input(num_features, 1)) | ||
allocate(gradient(num_features, 1)) | ||
sample_input = 1.0 | ||
gradient = 2.0 | ||
|
||
!TODO run forward and backward passes directly on the batchnorm_layer instance | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll add a simple test directly on the |
||
!TODO since we don't yet support tiying in with the input layer. | ||
|
||
!TODO Retrieve output and check normalization | ||
!call bn_layer % get_output(output) | ||
!if (.not. all(abs(output - sample_input) < tolerance)) then | ||
! ok = .false. | ||
! write(stderr, '(a)') 'batchnorm layer output should be close to input.. failed' | ||
!end if | ||
|
||
!TODO Retrieve gamma and beta gradients | ||
!allocate(gamma_grad(num_features)) | ||
!allocate(beta_grad(num_features)) | ||
!call bn_layer % get_gradients(gamma_grad, beta_grad) | ||
|
||
!if (.not. all(beta_grad == sum(gradient))) then | ||
! ok = .false. | ||
! write(stderr, '(a)') 'batchnorm layer beta gradients are incorrect.. failed' | ||
!end if | ||
|
||
! Report test results | ||
if (ok) then | ||
print '(a)', 'test_batchnorm_layer: All tests passed.' | ||
else | ||
write(stderr, '(a)') 'test_batchnorm_layer: One or more tests failed.' | ||
stop 1 | ||
end if | ||
|
||
end program test_batchnorm_layer |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think the shape for these should be
(num_features, num_features)
, but rather(batch_size, num_features)
. Thebatch_size
also won't be known until the first forward pass, so we should defer the allocation until then. In the forward pass, we could have a simpleallocated
check to see if they have not been allocated then, and allocate them to the shape of theinput
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I meant
(num_features, batch_size)
.