Open
Description
I was hardly trying to find an example of layer_attention for Keras in the Rstudio's library, didn't find one.
I got the net working in two fashions: LSTM(return_sequences = T) -> Attention -> LSTM(return_sequences = F)
; LSTM(return_sequences = T) -> Attention -> Flatten
before the dense layers. Note in my code that layer_flatten
is commented; you could comment the second layer_lstm instead. Both approaches output 1D tensor, which at least seems to be fitting the expected dimensionality of the NN output.
What is a correct way, or more sensible? I am not very experienced in this field...
library(keras)
rm(nn_model)
lstm_units <- 16L
lstm_seq_len <- 4L
nfeatures <- 2L; final_diffs <- 1:3
inputs <-
layer_input(shape = list(lstm_seq_len, nfeatures))
lstm_output <-
inputs %>%
layer_lstm(
input_shape = list(lstm_seq_len, nfeatures)
, units = lstm_units
, activation = 'relu'
, return_sequences = T
, stateful = F
, name = 'lstm1'
)
predictions <-
layer_attention(
inputs = list(lstm_output, lstm_output),
use_scale = FALSE,
causal = FALSE,
batch_size = NULL,
dtype = NULL,
name = 'attention',
trainable = T,
weights = NULL
) %>%
layer_lstm(
input_shape = list(lstm_seq_len, nfeatures)
, units = lstm_units
, activation = 'relu'
, return_sequences = F
, stateful = F
, name = 'lstm2'
) %>%
#layer_flatten %>%
layer_dense(units = 64L, activation = NULL, name = 'dense1') %>%
layer_batch_normalization(name = 'bn1') %>%
layer_activation(activation = "relu", name = 'act1') %>%
layer_dense(units = 32L, activation = NULL, name = 'dense2') %>%
layer_batch_normalization(name = 'bn2') %>%
layer_activation(activation = "relu", name = 'act2') %>%
layer_dense(units = length(final_diffs), activation = 'softmax', name = 'dense3')
optimizer <-
optimizer_adam(lr = 1e-5)
nn_model <-
keras_model(inputs = inputs, outputs = predictions)
nn_model %>%
keras::compile(
optimizer = optimizer,
loss = 'categorical_crossentropy',
metrics = 'categorical_accuracy'
)
summary(nn_model)
predict(nn_model, array(runif(lstm_seq_len * nfeatures, 0, 1), dim = c(1, lstm_seq_len, nfeatures)))