Skip to content

Commit

Permalink
SGD optimizer stub (#139)
Browse files Browse the repository at this point in the history
* Defining the SGD minimization step in the optimizer type

* Add note about refactor needed

* Pass optimizer instance down to layer % update()

* Apply the optimizer update step in layer % update

* Changes in tests and examples to account for the API change in network % update()

* Make optimizer optional; default to SGD with learning rate of 1

* Apply optimizer to conv2d layer
  • Loading branch information
milancurcic authored Jun 22, 2023
1 parent 44833c2 commit 31fc061
Show file tree
Hide file tree
Showing 14 changed files with 138 additions and 81 deletions.
3 changes: 2 additions & 1 deletion example/get_set_network_params.f90
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
program get_set_network_params
use nf, only: dense, input, network
use nf_optimizers, only: sgd
implicit none
type(network) :: net1, net2
real :: x(1), y(1)
Expand Down Expand Up @@ -37,7 +38,7 @@ program get_set_network_params

call net1 % forward(x)
call net1 % backward(y)
call net1 % update(1.)
call net1 % update(sgd(learning_rate=1.))

if (mod(n, 10000) == 0) then
ypred1 = [(net1 % predict([xtest(i)]), i=1, test_size)]
Expand Down
7 changes: 4 additions & 3 deletions example/quadratic.f90
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ program quadratic_fit
! descent.
use nf, only: dense, input, network
use nf_dense_layer, only: dense_layer
use nf_optimizers, only: sgd

implicit none
type(network) :: net_sgd, net_batch_sgd, net_minibatch_sgd, net_rms_prop
Expand Down Expand Up @@ -97,7 +98,7 @@ subroutine sgd_optimizer(net, x, y, learning_rate, num_epochs)
do i = 1, size(x)
call net % forward([x(i)])
call net % backward([y(i)])
call net % update(learning_rate)
call net % update(sgd(learning_rate=learning_rate))
end do
end do

Expand All @@ -120,7 +121,7 @@ subroutine batch_gd_optimizer(net, x, y, learning_rate, num_epochs)
call net % forward([x(i)])
call net % backward([y(i)])
end do
call net % update(learning_rate / size(x))
call net % update(sgd(learning_rate=learning_rate / size(x)))
end do

end subroutine batch_gd_optimizer
Expand Down Expand Up @@ -164,7 +165,7 @@ subroutine minibatch_gd_optimizer(net, x, y, learning_rate, num_epochs, batch_si
call net % backward([y(i)])
end do

call net % update(learning_rate / batch_size)
call net % update(sgd(learning_rate=learning_rate / batch_size))
end do
end do
end subroutine minibatch_gd_optimizer
Expand Down
2 changes: 1 addition & 1 deletion example/simple.f90
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ program simple

call net % forward(x)
call net % backward(y)
call net % update(1.)
call net % update()

if (mod(n, 50) == 0) &
print '(i4,2(3x,f8.6))', n, net % predict(x)
Expand Down
2 changes: 1 addition & 1 deletion example/sine.f90
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ program sine

call net % forward(x)
call net % backward(y)
call net % update(1.)
call net % update()

if (mod(n, 10000) == 0) then
ypred = [(net % predict([xtest(i)]), i = 1, test_size)]
Expand Down
9 changes: 0 additions & 9 deletions src/nf/nf_conv2d_layer.f90
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ module nf_conv2d_layer
procedure :: get_num_params
procedure :: get_params
procedure :: set_params
procedure :: update

end type conv2d_layer

Expand Down Expand Up @@ -105,14 +104,6 @@ module subroutine set_params(self, params)
!! Parameters to set
end subroutine set_params

module subroutine update(self, learning_rate)
!! Update the weights and biases.
class(conv2d_layer), intent(in out) :: self
!! Dense layer instance
real, intent(in) :: learning_rate
!! Learning rate (must be > 0)
end subroutine update

end interface

end module nf_conv2d_layer
16 changes: 0 additions & 16 deletions src/nf/nf_conv2d_layer_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -225,20 +225,4 @@ module subroutine set_params(self, params)

end subroutine set_params


module subroutine update(self, learning_rate)
class(conv2d_layer), intent(in out) :: self
real, intent(in) :: learning_rate

! Sum weight and bias gradients across images, if any
call co_sum(self % dw)
call co_sum(self % db)

self % kernel = self % kernel - learning_rate * self % dw
self % biases = self % biases - learning_rate * self % db
self % dw = 0
self % db = 0

end subroutine update

end submodule nf_conv2d_layer_submodule
9 changes: 0 additions & 9 deletions src/nf/nf_dense_layer.f90
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ module nf_dense_layer
procedure :: get_params
procedure :: set_params
procedure :: init
procedure :: update

end type dense_layer

Expand Down Expand Up @@ -115,14 +114,6 @@ module subroutine init(self, input_shape)
!! Shape of the input layer
end subroutine init

module subroutine update(self, learning_rate)
!! Update the weights and biases.
class(dense_layer), intent(in out) :: self
!! Dense layer instance
real, intent(in) :: learning_rate
!! Learning rate (must be > 0)
end subroutine update

end interface

end module nf_dense_layer
15 changes: 0 additions & 15 deletions src/nf/nf_dense_layer_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -128,19 +128,4 @@ module subroutine init(self, input_shape)

end subroutine init

module subroutine update(self, learning_rate)
class(dense_layer), intent(in out) :: self
real, intent(in) :: learning_rate

! Sum weight and bias gradients across images, if any
call co_sum(self % dw)
call co_sum(self % db)

self % weights = self % weights - learning_rate * self % dw
self % biases = self % biases - learning_rate * self % db
self % dw = 0
self % db = 0

end subroutine update

end submodule nf_dense_layer_submodule
9 changes: 6 additions & 3 deletions src/nf/nf_layer.f90
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ module nf_layer
!! user-facing API.

use nf_base_layer, only: base_layer
use nf_optimizers, only: optimizer_base_type

implicit none

Expand Down Expand Up @@ -144,16 +145,18 @@ module subroutine set_params(self, params)
!! Parameters of this layer
end subroutine set_params

impure elemental module subroutine update(self, learning_rate)
impure elemental module subroutine update(self, optimizer, batch_size)
!! Update the weights and biases on the layer using the stored
!! gradients (from backward passes), and flush those same stored
!! gradients to zero.
!! This changes the state of the layer.
!! Typically used only internally from the `network % update` method.
class(layer), intent(in out) :: self
!! Layer instance
real, intent(in) :: learning_rate
!! Learning rate to use; must be > 0.
class(optimizer_base_type), intent(in) :: optimizer
!! Optimizer instance to use
integer, intent(in), optional :: batch_size
!! Batch size (default 1)
end subroutine update

end interface
Expand Down
54 changes: 47 additions & 7 deletions src/nf/nf_layer_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use nf_input3d_layer, only: input3d_layer
use nf_maxpool2d_layer, only: maxpool2d_layer
use nf_reshape_layer, only: reshape3d_layer
use nf_optimizers, only: optimizer_base_type

contains

Expand Down Expand Up @@ -382,15 +383,54 @@ module subroutine set_params(self, params)
end subroutine set_params


impure elemental module subroutine update(self, learning_rate)
impure elemental module subroutine update(self, optimizer, batch_size)
class(layer), intent(in out) :: self
real, intent(in) :: learning_rate
class(optimizer_base_type), intent(in) :: optimizer
integer, intent(in), optional :: batch_size
integer :: batch_size_

batch_size_ = 1
if (present(batch_size)) batch_size_ = batch_size

select type (this_layer => self % p)
type is (dense_layer)

! Sum weight and bias gradients across images, if any
call co_sum(this_layer % dw)
call co_sum(this_layer % db)

call optimizer % minimize( &
this_layer % weights, &
this_layer % dw / batch_size_ &
)
call optimizer % minimize( &
this_layer % biases, &
this_layer % db / batch_size_ &
)

! Reset gradients.
this_layer % dw = 0
this_layer % db = 0

type is (conv2d_layer)

! Sum weight and bias gradients across images, if any
call co_sum(this_layer % dw)
call co_sum(this_layer % db)

call optimizer % minimize( &
this_layer % kernel, &
this_layer % dw / batch_size_ &
)
call optimizer % minimize( &
this_layer % biases, &
this_layer % db / batch_size_ &
)

! Reset gradients.
this_layer % dw = 0
this_layer % db = 0

select type(this_layer => self % p)
type is(dense_layer)
call this_layer % update(learning_rate)
type is(conv2d_layer)
call this_layer % update(learning_rate)
end select

end subroutine update
Expand Down
15 changes: 9 additions & 6 deletions src/nf/nf_network.f90
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,11 @@ module subroutine train(self, input_data, output_data, batch_size, &
!! Set to `size(input_data, dim=2)` for a batch gradient descent.
integer, intent(in) :: epochs
!! Number of epochs to run
class(optimizer_base_type), intent(in) :: optimizer
!! Optimizer instance; currently this is an `sgd` optimizer type
!! and it will be made to be a more general optimizer type.
class(optimizer_base_type), intent(in), optional :: optimizer
!! Optimizer instance to use. If not provided, the default is sgd().
end subroutine train

module subroutine update(self, learning_rate)
module subroutine update(self, optimizer, batch_size)
!! Update the weights and biases on all layers using the stored
!! gradients (from backward passes) on those layers, and flush those
!! same stored gradients to zero.
Expand All @@ -207,8 +206,12 @@ module subroutine update(self, learning_rate)
!! but can be invoked by the user when creating custom optimizers.
class(network), intent(in out) :: self
!! Network instance
real, intent(in) :: learning_rate
!! Learning rate to use; must be > 0.
class(optimizer_base_type), intent(in), optional :: optimizer
!! Optimizer instance to use
integer, intent(in), optional :: batch_size
!! Batch size to use.
!! Set to 1 for a pure stochastic gradient descent (default).
!! Set to `size(input_data, dim=2)` for a batch gradient descent.
end subroutine update

end interface
Expand Down
33 changes: 27 additions & 6 deletions src/nf/nf_network_submodule.f90
Original file line number Diff line number Diff line change
Expand Up @@ -520,14 +520,23 @@ module subroutine train(self, input_data, output_data, batch_size, &
real, intent(in) :: output_data(:,:)
integer, intent(in) :: batch_size
integer, intent(in) :: epochs
class(optimizer_base_type), intent(in) :: optimizer
class(optimizer_base_type), intent(in), optional :: optimizer
class(optimizer_base_type), allocatable :: optimizer_

real :: pos
integer :: dataset_size
integer :: batch_start, batch_end
integer :: i, j, n
integer :: istart, iend, indices(2)

! Passing the optimizer instance is optional.
! If not provided, we default to SGD with its default settings.
if (present(optimizer)) then
optimizer_ = optimizer
else
optimizer_ = sgd()
end if

dataset_size = size(output_data, dim=2)

epoch_loop: do n = 1, epochs
Expand All @@ -552,9 +561,9 @@ module subroutine train(self, input_data, output_data, batch_size, &
call self % backward(output_data(:,j))
end do

select type (optimizer)
select type (optimizer_)
type is (sgd)
call self % update(optimizer % learning_rate / batch_size)
call self % update(optimizer_, batch_size)
class default
error stop 'Unsupported optimizer'
end select
Expand All @@ -565,10 +574,22 @@ module subroutine train(self, input_data, output_data, batch_size, &
end subroutine train


module subroutine update(self, learning_rate)
module subroutine update(self, optimizer, batch_size)
class(network), intent(in out) :: self
real, intent(in) :: learning_rate
call self % layers % update(learning_rate)
class(optimizer_base_type), intent(in), optional :: optimizer
integer, intent(in), optional :: batch_size
class(optimizer_base_type), allocatable :: optimizer_

! Passing the optimizer instance is optional.
! If not provided, we default to SGD with its default settings.
if (present(optimizer)) then
optimizer_ = optimizer
else
optimizer_ = sgd()
end if

call self % layers % update(optimizer_, batch_size)

end subroutine update

end submodule nf_network_submodule
Loading

0 comments on commit 31fc061

Please sign in to comment.