Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mv optimizer from the network level to the layer level #184

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/nf/nf_dense_layer.f90
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ module nf_dense_layer
!! It is used internally by the layer type.
!! It is not intended to be used directly by the user.

use nf_optimizers, only: optimizer_base_type
use nf_activation, only: activation_function
use nf_base_layer, only: base_layer

Expand All @@ -28,6 +29,8 @@ module nf_dense_layer
real, allocatable :: db(:) ! bias gradients

class(activation_function), allocatable :: activation
class(optimizer_base_type), allocatable :: optimizer_1d
class(optimizer_base_type), allocatable :: optimizer_2d

contains

Expand All @@ -38,6 +41,8 @@ module nf_dense_layer
procedure :: get_params
procedure :: init
procedure :: set_params
procedure :: apply_optimizer
procedure :: set_optimizer

end type dense_layer

Expand Down Expand Up @@ -124,6 +129,20 @@ module subroutine init(self, input_shape)
!! Shape of the input layer
end subroutine init

module subroutine apply_optimizer(self, batch_size)
class(dense_layer), intent(in out), target :: self
integer, intent(in) :: batch_size
end subroutine apply_optimizer

module subroutine set_optimizer(self, optimizer)
!! Initialize the layer data structures.
!!
!! This is a deferred procedure from the `base_layer` abstract type.
class(dense_layer), intent(in out) :: self
!! Dense layer instance
class(optimizer_base_type), intent(in), optional :: optimizer

end subroutine set_optimizer
end interface

end module nf_dense_layer
41 changes: 41 additions & 0 deletions src/nf/nf_dense_layer_submodule.f90
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
submodule(nf_dense_layer) nf_dense_layer_submodule

use nf_optimizers, only: adam
use nf_activation, only: activation_function
use nf_base_layer, only: base_layer
use nf_random, only: random_normal
Expand Down Expand Up @@ -151,4 +152,44 @@ module subroutine init(self, input_shape)

end subroutine init

module subroutine set_optimizer(self, optimizer)
class(dense_layer), intent(in out) :: self
class(optimizer_base_type), intent(in), optional:: optimizer

if (.not. allocated(self % optimizer_1d)) then
if (present(optimizer)) then
self % optimizer_1d = optimizer
else
self % optimizer_1d = adam(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1.e-7)
end if
call self % optimizer_1d % init(self % output_size)
end if
if (.not. allocated(self % optimizer_2d)) then
if (present(optimizer)) then
self % optimizer_2d = optimizer
else
self % optimizer_2d = adam(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1.e-7)
end if
call self % optimizer_2d % init(self % input_size * self % output_size)
end if

end subroutine set_optimizer

module subroutine apply_optimizer(self, batch_size)
class(dense_layer), intent(in out), target :: self
integer, intent(in) :: batch_size

real, pointer :: w_(:), dw_(:)

call self % optimizer_1d % minimize( self % biases, self % db / batch_size)

associate(n => self % input_size * self % output_size)
w_(1:n) => self % weights
dw_(1:n) => self % dw
call self % optimizer_2d % minimize( w_, dw_ / batch_size)
end associate


end subroutine apply_optimizer

end submodule nf_dense_layer_submodule
12 changes: 12 additions & 0 deletions src/nf/nf_network.f90
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ module nf_network
procedure :: get_params
procedure :: print_info
procedure :: set_params
procedure :: apply_optimizer
procedure :: set_optimizers
procedure :: train
procedure :: update

Expand Down Expand Up @@ -242,6 +244,16 @@ module subroutine update(self, optimizer, batch_size)
!! Set to `size(input_data, dim=2)` for a batch gradient descent.
end subroutine update

module subroutine set_optimizers(self, optimizer)
class(network), intent(in out) :: self
class(optimizer_base_type), intent(in) :: optimizer
end subroutine set_optimizers

module subroutine apply_optimizer(self, batch_size)
class(network), intent(in out) :: self
integer, intent(in) :: batch_size
end subroutine

end interface

end module nf_network
45 changes: 42 additions & 3 deletions src/nf/nf_network_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,25 @@ module subroutine set_params(self, params)

end subroutine set_params

module subroutine set_optimizers(self, optimizer)
class(network), intent(in out) :: self
class(optimizer_base_type), intent(in) :: optimizer

integer :: n

do n = 1, size(self % layers)

select type (this_layer => self % layers(n) % p)

type is (dense_layer)
call this_layer % set_optimizer(optimizer)

end select

end do

end subroutine set_optimizers


module subroutine train(self, input_data, output_data, batch_size, &
epochs, optimizer, loss)
Expand Down Expand Up @@ -426,6 +445,8 @@ module subroutine train(self, input_data, output_data, batch_size, &

call self % optimizer % init(self % get_num_params())

call self % set_optimizers(optimizer)

! Passing the loss instance is optional.
! If not provided, we default to quadratic().
if (present(loss)) then
Expand Down Expand Up @@ -506,9 +527,11 @@ module subroutine update(self, optimizer, batch_size)
end select
end do

params = self % get_params()
call self % optimizer % minimize(params, self % get_gradients() / batch_size_)
call self % set_params(params)
! params = self % get_params()
! call self % optimizer % minimize(params, self % get_gradients() / batch_size_)
! call self % set_params(params)

call self % apply_optimizer(batch_size_)

! Flush network gradients to zero.
do concurrent(n = 2:size(self % layers))
Expand All @@ -524,4 +547,20 @@ module subroutine update(self, optimizer, batch_size)

end subroutine update


module subroutine apply_optimizer(self, batch_size)
class(network), intent(in out) :: self
integer, intent(in) :: batch_size

integer :: n

do concurrent(n = 2:size(self % layers))
select type(this_layer => self % layers(n) % p)
type is(dense_layer)
call this_layer % apply_optimizer(batch_size)
end select
end do

end subroutine apply_optimizer

end submodule nf_network_submodule
Loading