13
13
use nf_linear2d_layer, only: linear2d_layer
14
14
use nf_self_attention_layer, only: self_attention_layer
15
15
use nf_embedding_layer, only: embedding_layer
16
+ use nf_layernorm_layer, only: layernorm_layer
16
17
use nf_optimizers, only: optimizer_base_type
17
18
18
19
contains
@@ -47,7 +48,7 @@ pure module subroutine backward_1d(self, previous, gradient)
47
48
48
49
type is (flatten_layer)
49
50
50
- ! Upstream layers permitted: input2d, input3d, conv2d, maxpool2d
51
+ ! Upstream layers permitted: input2d, input3d, conv2d, layernorm, maxpool2d
51
52
select type (prev_layer = > previous % p)
52
53
type is (input2d_layer)
53
54
call this_layer % backward(prev_layer % output, gradient)
@@ -63,6 +64,8 @@ pure module subroutine backward_1d(self, previous, gradient)
63
64
call this_layer % backward(prev_layer % output, gradient)
64
65
type is (embedding_layer)
65
66
call this_layer % backward(prev_layer % output, gradient)
67
+ type is (layernorm_layer)
68
+ call this_layer % backward(prev_layer % output, gradient)
66
69
end select
67
70
68
71
end select
@@ -89,6 +92,8 @@ pure module subroutine backward_2d(self, previous, gradient)
89
92
call this_layer % backward(prev_layer % output, gradient)
90
93
type is (self_attention_layer)
91
94
call this_layer % backward(prev_layer % output, gradient)
95
+ type is (layernorm_layer)
96
+ call this_layer % backward(prev_layer % output, gradient)
92
97
end select
93
98
94
99
type is (self_attention_layer)
@@ -102,8 +107,18 @@ pure module subroutine backward_2d(self, previous, gradient)
102
107
call this_layer % backward(prev_layer % output, gradient)
103
108
type is (self_attention_layer)
104
109
call this_layer % backward(prev_layer % output, gradient)
110
+ type is (layernorm_layer)
111
+ call this_layer % backward(prev_layer % output, gradient)
105
112
end select
106
113
114
+ type is (layernorm_layer)
115
+
116
+ select type (prev_layer = > previous % p)
117
+ type is (linear2d_layer)
118
+ call this_layer % backward(prev_layer % output, gradient)
119
+ type is (self_attention_layer)
120
+ call this_layer % backward(prev_layer % output, gradient)
121
+ end select
107
122
end select
108
123
109
124
end subroutine backward_2d
@@ -241,6 +256,8 @@ module subroutine forward(self, input)
241
256
call this_layer % forward(prev_layer % output)
242
257
type is (linear2d_layer)
243
258
call this_layer % forward(prev_layer % output)
259
+ type is (layernorm_layer)
260
+ call this_layer % forward(prev_layer % output)
244
261
end select
245
262
246
263
type is (reshape3d_layer)
@@ -257,7 +274,7 @@ module subroutine forward(self, input)
257
274
258
275
type is (linear2d_layer)
259
276
260
- ! Upstream layers permitted: input2d, linear2d
277
+ ! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
261
278
select type (prev_layer = > input % p)
262
279
type is (input2d_layer)
263
280
call this_layer % forward(prev_layer % output)
@@ -267,11 +284,13 @@ module subroutine forward(self, input)
267
284
call this_layer % forward(prev_layer % output)
268
285
type is (self_attention_layer)
269
286
call this_layer % forward(prev_layer % output)
287
+ type is (layernorm_layer)
288
+ call this_layer % forward(prev_layer % output)
270
289
end select
271
290
272
291
type is (self_attention_layer)
273
292
274
- ! Upstream layers permitted: input2d, linear2d
293
+ ! Upstream layers permitted: input2d, linear2d, self_attention, layernorm
275
294
select type (prev_layer = > input % p)
276
295
type is (input2d_layer)
277
296
call this_layer % forward(prev_layer % output)
@@ -281,6 +300,18 @@ module subroutine forward(self, input)
281
300
call this_layer % forward(prev_layer % output)
282
301
type is (self_attention_layer)
283
302
call this_layer % forward(prev_layer % output)
303
+ type is (layernorm_layer)
304
+ call this_layer % forward(prev_layer % output)
305
+ end select
306
+
307
+ type is (layernorm_layer)
308
+
309
+ ! Upstream layers permitted: linear2d, self_attention
310
+ select type (prev_layer = > input % p)
311
+ type is (linear2d_layer)
312
+ call this_layer % forward(prev_layer % output)
313
+ type is (self_attention_layer)
314
+ call this_layer % forward(prev_layer % output)
284
315
end select
285
316
286
317
end select
@@ -324,6 +355,8 @@ pure module subroutine get_output_2d(self, output)
324
355
allocate (output, source= this_layer % output)
325
356
type is (self_attention_layer)
326
357
allocate (output, source= this_layer % output)
358
+ type is (layernorm_layer)
359
+ allocate (output, source= this_layer % output)
327
360
class default
328
361
error stop ' 2-d output can only be read from an input2d or linear2d layer.'
329
362
@@ -367,8 +400,8 @@ impure elemental module subroutine init(self, input)
367
400
call this_layer % init(input % layer_shape)
368
401
end select
369
402
370
- ! The shape of conv2d, dropout, flatten, linear2d, maxpool2d, or
371
- ! self_attention layers is not known until we receive an input layer.
403
+ ! The shape of conv2d, dropout, flatten, linear2d, maxpool2d,
404
+ ! self_attention or layernorm layers is not known until we receive an input layer.
372
405
select type (this_layer = > self % p)
373
406
type is (conv2d_layer)
374
407
self % layer_shape = shape (this_layer % output)
@@ -380,6 +413,8 @@ impure elemental module subroutine init(self, input)
380
413
self % layer_shape = shape (this_layer % output)
381
414
type is (self_attention_layer)
382
415
self % layer_shape = shape (this_layer % output)
416
+ type is (layernorm_layer)
417
+ self % layer_shape = shape (this_layer % output)
383
418
type is (maxpool2d_layer)
384
419
self % layer_shape = shape (this_layer % output)
385
420
end select
@@ -440,6 +475,8 @@ elemental module function get_num_params(self) result(num_params)
440
475
num_params = this_layer % get_num_params()
441
476
type is (embedding_layer)
442
477
num_params = this_layer % get_num_params()
478
+ type is (layernorm_layer)
479
+ num_params = this_layer % get_num_params()
443
480
class default
444
481
error stop ' Unknown layer type.'
445
482
end select
@@ -475,6 +512,8 @@ module function get_params(self) result(params)
475
512
params = this_layer % get_params()
476
513
type is (embedding_layer)
477
514
params = this_layer % get_params()
515
+ type is (layernorm_layer)
516
+ params = this_layer % get_params()
478
517
class default
479
518
error stop ' Unknown layer type.'
480
519
end select
@@ -510,6 +549,8 @@ module function get_gradients(self) result(gradients)
510
549
gradients = this_layer % get_gradients()
511
550
type is (embedding_layer)
512
551
gradients = this_layer % get_gradients()
552
+ type is (layernorm_layer)
553
+ gradients = this_layer % get_gradients()
513
554
class default
514
555
error stop ' Unknown layer type.'
515
556
end select
@@ -570,6 +611,9 @@ module subroutine set_params(self, params)
570
611
type is (embedding_layer)
571
612
call this_layer % set_params(params)
572
613
614
+ type is (layernorm_layer)
615
+ call this_layer % set_params(params)
616
+
573
617
type is (maxpool2d_layer)
574
618
! No parameters to set.
575
619
write (stderr, ' (a)' ) ' Warning: calling set_params() ' &
0 commit comments