Skip to content

Commit fedf098

Browse files
committed
Resolve conflicts with main
2 parents fe02beb + e68e6c2 commit fedf098

12 files changed

+521
-15
lines changed

CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ add_library(neural-fortran
3737
src/nf/nf_input3d_layer_submodule.f90
3838
src/nf/nf_layer_constructors.f90
3939
src/nf/nf_layer_constructors_submodule.f90
40+
src/nf/nf_layernorm.f90
41+
src/nf/nf_layernorm_submodule.f90
4042
src/nf/nf_layer.f90
4143
src/nf/nf_layer_submodule.f90
4244
src/nf/nf_linear2d_layer.f90

README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
3434
| Dropout | `dropout` | `dense`, `flatten`, `input1d` | 1 |||
3535
| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 || ✅(*) |
3636
| Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 |||
37-
| Linear (2-d) | `linear2d` | `input2d`, `linear2d`, `self_attention` | 2 |||
38-
| Self-attention | `self_attention` | `input2d`, `linear2d`, `self_attention` | 2 |||
37+
| Linear (2-d) | `linear2d` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 |||
38+
| Self-attention | `self_attention` | `input2d`, `layernorm`, `linear2d`, `self_attention` | 2 |||
39+
| Layer Normalization | `layernorm` | `linear2d`, `self_attention` | 2 |||
3940
| Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 |||
4041
| Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 |||
4142

fpm.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "neural-fortran"
2-
version = "0.19.0"
2+
version = "0.20.0"
33
license = "MIT"
44
author = "Milan Curcic"
55
maintainer = "mcurcic@miami.edu"

src/nf.f90

+3-2
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@ module nf
66
conv2d, &
77
dense, &
88
dropout, &
9+
embedding, &
910
flatten, &
1011
input, &
12+
layernorm, &
1113
linear2d, &
1214
maxpool2d, &
1315
reshape, &
14-
self_attention, &
15-
embedding
16+
self_attention
1617
use nf_loss, only: mse, quadratic
1718
use nf_metrics, only: corr, maxabs
1819
use nf_network, only: network

src/nf/nf_layer_constructors.f90

+17-5
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ module nf_layer_constructors
1818
maxpool2d, &
1919
reshape, &
2020
self_attention, &
21-
embedding
21+
embedding, &
22+
layernorm
2223

2324
interface input
2425

@@ -239,14 +240,25 @@ module function embedding(sequence_length, vocab_size, model_dimension, position
239240
!! This layer is for inputting token indices from the dictionary to the network.
240241
!! Works as a trainable lookup table that converts each index into a vector.
241242
!! Embedding layer must be the first layer in a network.
242-
!! `sequence_length`: max len of input sequence
243-
!! `vocab_size`: length of token vocabulary
244-
!! `model_dimension`: size of target embeddings
245-
integer, intent(in) :: sequence_length, vocab_size, model_dimension
243+
integer, intent(in) :: sequence_length
244+
!! max len of input sequence
245+
integer, intent(in) :: vocab_size
246+
!! length of token vocabulary
247+
integer, intent(in) :: model_dimension
248+
!! size of target embeddings
246249
integer, optional, intent(in) :: positional
250+
!! positional encoding
247251
type(layer) :: res
248252
end function embedding
249253

254+
module function layernorm() result(res)
255+
!! Layer Normalization
256+
!! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
257+
!! Based upon `Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton(2016)`:
258+
!! https://arxiv.org/abs/1607.06450v1
259+
type(layer) :: res
260+
end function layernorm
261+
250262
end interface
251263

252264
end module nf_layer_constructors

src/nf/nf_layer_constructors_submodule.f90

+8
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
use nf_linear2d_layer, only: linear2d_layer
1414
use nf_self_attention_layer, only: self_attention_layer
1515
use nf_embedding_layer, only: embedding_layer
16+
use nf_layernorm_layer, only: layernorm_layer
1617
use nf_activation, only: activation_function, relu, sigmoid
1718

1819
implicit none
@@ -198,4 +199,11 @@ module function embedding(sequence_length, vocab_size, model_dimension, position
198199

199200
end function embedding
200201

202+
203+
module function layernorm() result(res)
204+
type(layer) :: res
205+
res % name = 'layernorm'
206+
allocate(res % p, source=layernorm_layer())
207+
end function layernorm
208+
201209
end submodule nf_layer_constructors_submodule

src/nf/nf_layer_submodule.f90

+49-5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
use nf_linear2d_layer, only: linear2d_layer
1414
use nf_self_attention_layer, only: self_attention_layer
1515
use nf_embedding_layer, only: embedding_layer
16+
use nf_layernorm_layer, only: layernorm_layer
1617
use nf_optimizers, only: optimizer_base_type
1718

1819
contains
@@ -47,7 +48,7 @@ pure module subroutine backward_1d(self, previous, gradient)
4748

4849
type is(flatten_layer)
4950

50-
! Upstream layers permitted: input2d, input3d, conv2d, maxpool2d
51+
! Upstream layers permitted: input2d, input3d, conv2d, layernorm, maxpool2d
5152
select type(prev_layer => previous % p)
5253
type is(input2d_layer)
5354
call this_layer % backward(prev_layer % output, gradient)
@@ -63,6 +64,8 @@ pure module subroutine backward_1d(self, previous, gradient)
6364
call this_layer % backward(prev_layer % output, gradient)
6465
type is(embedding_layer)
6566
call this_layer % backward(prev_layer % output, gradient)
67+
type is(layernorm_layer)
68+
call this_layer % backward(prev_layer % output, gradient)
6669
end select
6770

6871
end select
@@ -89,6 +92,8 @@ pure module subroutine backward_2d(self, previous, gradient)
8992
call this_layer % backward(prev_layer % output, gradient)
9093
type is(self_attention_layer)
9194
call this_layer % backward(prev_layer % output, gradient)
95+
type is(layernorm_layer)
96+
call this_layer % backward(prev_layer % output, gradient)
9297
end select
9398

9499
type is(self_attention_layer)
@@ -102,8 +107,18 @@ pure module subroutine backward_2d(self, previous, gradient)
102107
call this_layer % backward(prev_layer % output, gradient)
103108
type is(self_attention_layer)
104109
call this_layer % backward(prev_layer % output, gradient)
110+
type is(layernorm_layer)
111+
call this_layer % backward(prev_layer % output, gradient)
105112
end select
106113

114+
type is(layernorm_layer)
115+
116+
select type(prev_layer => previous % p)
117+
type is(linear2d_layer)
118+
call this_layer % backward(prev_layer % output, gradient)
119+
type is(self_attention_layer)
120+
call this_layer % backward(prev_layer % output, gradient)
121+
end select
107122
end select
108123

109124
end subroutine backward_2d
@@ -241,6 +256,8 @@ module subroutine forward(self, input)
241256
call this_layer % forward(prev_layer % output)
242257
type is(linear2d_layer)
243258
call this_layer % forward(prev_layer % output)
259+
type is(layernorm_layer)
260+
call this_layer % forward(prev_layer % output)
244261
end select
245262

246263
type is(reshape3d_layer)
@@ -257,7 +274,7 @@ module subroutine forward(self, input)
257274

258275
type is(linear2d_layer)
259276

260-
! Upstream layers permitted: input2d, linear2d
277+
! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
261278
select type(prev_layer => input % p)
262279
type is(input2d_layer)
263280
call this_layer % forward(prev_layer % output)
@@ -267,11 +284,13 @@ module subroutine forward(self, input)
267284
call this_layer % forward(prev_layer % output)
268285
type is(self_attention_layer)
269286
call this_layer % forward(prev_layer % output)
287+
type is(layernorm_layer)
288+
call this_layer % forward(prev_layer % output)
270289
end select
271290

272291
type is(self_attention_layer)
273292

274-
! Upstream layers permitted: input2d, linear2d
293+
! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
275294
select type(prev_layer => input % p)
276295
type is(input2d_layer)
277296
call this_layer % forward(prev_layer % output)
@@ -281,6 +300,18 @@ module subroutine forward(self, input)
281300
call this_layer % forward(prev_layer % output)
282301
type is(self_attention_layer)
283302
call this_layer % forward(prev_layer % output)
303+
type is(layernorm_layer)
304+
call this_layer % forward(prev_layer % output)
305+
end select
306+
307+
type is(layernorm_layer)
308+
309+
! Upstream layers permitted: linear2d, self_attention
310+
select type(prev_layer => input % p)
311+
type is(linear2d_layer)
312+
call this_layer % forward(prev_layer % output)
313+
type is(self_attention_layer)
314+
call this_layer % forward(prev_layer % output)
284315
end select
285316

286317
end select
@@ -324,6 +355,8 @@ pure module subroutine get_output_2d(self, output)
324355
allocate(output, source=this_layer % output)
325356
type is(self_attention_layer)
326357
allocate(output, source=this_layer % output)
358+
type is(layernorm_layer)
359+
allocate(output, source=this_layer % output)
327360
class default
328361
error stop '2-d output can only be read from an input2d or linear2d layer.'
329362

@@ -367,8 +400,8 @@ impure elemental module subroutine init(self, input)
367400
call this_layer % init(input % layer_shape)
368401
end select
369402

370-
! The shape of conv2d, dropout, flatten, linear2d, maxpool2d, or
371-
! self_attention layers is not known until we receive an input layer.
403+
! The shape of conv2d, dropout, flatten, linear2d, maxpool2d,
404+
! self_attention or layernorm layers is not known until we receive an input layer.
372405
select type(this_layer => self % p)
373406
type is(conv2d_layer)
374407
self % layer_shape = shape(this_layer % output)
@@ -380,6 +413,8 @@ impure elemental module subroutine init(self, input)
380413
self % layer_shape = shape(this_layer % output)
381414
type is(self_attention_layer)
382415
self % layer_shape = shape(this_layer % output)
416+
type is(layernorm_layer)
417+
self % layer_shape = shape(this_layer % output)
383418
type is(maxpool2d_layer)
384419
self % layer_shape = shape(this_layer % output)
385420
end select
@@ -440,6 +475,8 @@ elemental module function get_num_params(self) result(num_params)
440475
num_params = this_layer % get_num_params()
441476
type is (embedding_layer)
442477
num_params = this_layer % get_num_params()
478+
type is (layernorm_layer)
479+
num_params = this_layer % get_num_params()
443480
class default
444481
error stop 'Unknown layer type.'
445482
end select
@@ -475,6 +512,8 @@ module function get_params(self) result(params)
475512
params = this_layer % get_params()
476513
type is (embedding_layer)
477514
params = this_layer % get_params()
515+
type is (layernorm_layer)
516+
params = this_layer % get_params()
478517
class default
479518
error stop 'Unknown layer type.'
480519
end select
@@ -510,6 +549,8 @@ module function get_gradients(self) result(gradients)
510549
gradients = this_layer % get_gradients()
511550
type is (embedding_layer)
512551
gradients = this_layer % get_gradients()
552+
type is (layernorm_layer)
553+
gradients = this_layer % get_gradients()
513554
class default
514555
error stop 'Unknown layer type.'
515556
end select
@@ -570,6 +611,9 @@ module subroutine set_params(self, params)
570611
type is (embedding_layer)
571612
call this_layer % set_params(params)
572613

614+
type is (layernorm_layer)
615+
call this_layer % set_params(params)
616+
573617
type is (maxpool2d_layer)
574618
! No parameters to set.
575619
write(stderr, '(a)') 'Warning: calling set_params() ' &

src/nf/nf_layernorm.f90

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
module nf_layernorm_layer
2+
use nf_activation, only: activation_function
3+
use nf_base_layer, only: base_layer
4+
5+
implicit none
6+
7+
private
8+
public :: layernorm_layer
9+
10+
type, extends(base_layer) :: layernorm_layer
11+
!! Layer Normalization
12+
!! ((x − mean(x)) / sqrt(variance(x) + eps) * gamma + beta
13+
!! Based upon `Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton(2016)`:
14+
!! https://arxiv.org/abs/1607.06450v1
15+
integer :: sequence_length
16+
integer :: model_dimension
17+
18+
real :: eps
19+
real, allocatable :: gamma(:)
20+
real, allocatable :: beta(:)
21+
22+
real, allocatable :: d_gamma(:)
23+
real, allocatable :: d_beta(:)
24+
real, allocatable :: gradient(:, :)
25+
26+
real, allocatable :: mu(:, :)
27+
real, allocatable :: sigma(:)
28+
29+
real, allocatable :: output(:, :)
30+
31+
! temp storages
32+
real, allocatable, private :: normalized(:, :)
33+
real, allocatable, private :: one_over_sigma(:, :)
34+
real, allocatable, private :: gradient_by_gamma_over_sigma(:, :)
35+
contains
36+
procedure :: forward
37+
procedure :: backward
38+
procedure :: init
39+
procedure :: get_num_params
40+
procedure :: get_params
41+
procedure :: get_gradients
42+
procedure :: set_params
43+
end type layernorm_layer
44+
45+
interface layernorm_layer
46+
module function layernorm_layer_cons() &
47+
result(res)
48+
type(layernorm_layer) :: res
49+
end function layernorm_layer_cons
50+
end interface layernorm_layer
51+
52+
interface
53+
pure module subroutine forward(self, input)
54+
class(layernorm_layer), intent(in out) :: self
55+
real, intent(in) :: input(:, :)
56+
end subroutine forward
57+
58+
pure module subroutine backward(self, input, gradient)
59+
class(layernorm_layer), intent(in out) :: self
60+
real, intent(in) :: input(:, :)
61+
real, intent(in) :: gradient(:, :)
62+
end subroutine backward
63+
64+
module subroutine init(self, input_shape)
65+
class(layernorm_layer), intent(in out) :: self
66+
integer, intent(in) :: input_shape(:)
67+
end subroutine init
68+
69+
pure module function get_num_params(self) result(num_params)
70+
class(layernorm_layer), intent(in) :: self
71+
integer :: num_params
72+
end function get_num_params
73+
74+
75+
module function get_params(self) result(params)
76+
class(layernorm_layer), intent(in), target :: self
77+
real, allocatable :: params(:)
78+
end function get_params
79+
80+
81+
module function get_gradients(self) result(gradients)
82+
class(layernorm_layer), intent(in), target :: self
83+
real, allocatable :: gradients(:)
84+
end function get_gradients
85+
86+
87+
module subroutine set_params(self, params)
88+
class(layernorm_layer), intent(in out) :: self
89+
real, intent(in), target :: params(:)
90+
end subroutine set_params
91+
end interface
92+
end module nf_layernorm_layer

0 commit comments

Comments
 (0)