D-Maar
diff --git a/‎A4-MLpipeline.qmd
Lines changed: 1 addition & 1 deletion b/‎A4-MLpipeline.qmd
Lines changed: 1 addition & 1 deletion
diff --git a/‎C2-DeepNeuralNetworks.qmd
Lines changed: 1 addition & 3 deletions b/‎C2-DeepNeuralNetworks.qmd
Lines changed: 1 addition & 3 deletions
diff --git a/‎C3-ConvolutionalNeuralNetworks.qmd
Lines changed: 1 addition & 1 deletion b/‎C3-ConvolutionalNeuralNetworks.qmd
Lines changed: 1 addition & 1 deletion
diff --git a/‎C4-RecurrentNeuralNetworks.qmd
Lines changed: 203 additions & 3 deletions b/‎C4-RecurrentNeuralNetworks.qmd
Lines changed: 203 additions & 3 deletions
diff --git a/‎C5-GNN.qmd
Lines changed: 113 additions & 0 deletions b/‎C5-GNN.qmd
Lines changed: 113 additions & 0 deletions
@@ -549,7 +549,7 @@ As we have seen today, many of the machine learning algorithms are distributed o
 
 Machine learning frameworks such as `mlr3` or `tidymodels` provide a general interface for the ML pipeline, in particular the training and the hyperparameter tuning with nested CV. They support most ML packages/algorithms.
 
-### mlr3 {#mlr}
+### mlr3 {#sec-mlr}
 
 The key features of mlr3 are:
 
 
@@ -374,14 +374,12 @@ What is the "mean_squared_error" loss?
 ```{r}
 model_history =
   model %>%
-  fit(x = x, y = matrix(y, ncol = 1L), epochs = 100L,
+  fit(x = x, y = as.numeric(y), epochs = 100L,
       batch_size = 20L, shuffle = TRUE)
 ```
 
 `r unhide()`
 
-Tip: Only matrices are accepted for $\boldsymbol{X}$ and $\boldsymbol{y}$ by Keras. R often drops a one column matrix into a vector (change it back to a matrix!)
-
 7.  **Plot training history.**
 
 `r hide("Solution")`
 
@@ -609,7 +609,7 @@ print(test_accuracy)
 ```
 :::
 
-### Transfer Learning {#transfer}
+### Transfer Learning {#sec-transfer}
 
 Another approach to reduce the necessary number of images or to speed up convergence of the models is the use of transfer learning.
 
 
@@ -4,9 +4,209 @@ editor_options:
   chunk_output_type: console
 ---
 
-# Recurrent Neural Networks
-Artificial neural networks are biologically inspired, the idea is that inputs are processed by weights, the neurons, the signals then accumulate at hidden nodes (axioms), and only if the sum of activations of several neurons exceed a certain threshold, the signal will be passed on.
+# Recurrent Neural Networks (RNN)
 
 ```{r}
-library(cito)
+#| echo: false
+#| include: false
+#| results: false
+reticulate::use_condaenv("r-reticulate")
+library(tensorflow)
+tf
+tf$abs(3.)
 ```
+
+
+Recurrent neural networks are used to model sequential data, i.e. a temporal sequence that exhibits temporal dynamic behavior. Here is a good introduction to the topic:
+
+```{r chunk_chapter5_0, eval=knitr::is_html_output(excludes = "epub"), results = 'asis', echo = F}
+cat(
+  '<iframe width="560" height="315" 
+  src="https://www.youtube.com/embed/SEnXr6v2ifU"
+  frameborder="0" allow="accelerometer; autoplay; encrypted-media;
+  gyroscope; picture-in-picture" allowfullscreen>
+  </iframe>'
+)
+```
+
+
+## Case Study: Predicting drought
+We will use a subset of the data explained in [this github repository](https://github.com/Epistoteles/predicting-drought)
+```{r chunk_chapter5_0_Rnn, message=FALSE, warning=FALSE}
+utils::download.file("https://www.dropbox.com/s/radyscnl5zcf57b/weather_soil.RDS?raw=1", destfile = "weather_soil.RDS")
+data = readRDS("weather_soil.RDS")
+X = data$train # Features of the last 180 days
+dim(X)
+# 999 batches of 180 days with 21 features each
+Y = data$target
+dim(Y)
+# 999 batches of 6 week drought predictions
+
+# let's visualize drought over 24 months:
+# -> We have to take 16 batches (16*6 = 96 weaks ( = 24 months) )
+plot(as.vector(Y[1:16,]), type = "l", xlab = "week", ylab = "Drought")
+```
+
+
+
+```{r chunk_chapter5_1_Rnn, message=FALSE, warning=FALSE}
+library(keras)
+
+holdout = 700:999
+X_train = X[-holdout,,]
+X_test = X[holdout,,]
+
+Y_train = Y[-holdout,]
+Y_test = Y[holdout,]
+
+model = keras_model_sequential()
+model %>% 
+  layer_rnn(cell = layer_lstm_cell(units = 60L),input_shape = dim(X)[2:3]) %>% 
+  layer_dense(units = 6L)
+
+model %>% compile(loss = loss_mean_squared_error, optimizer = optimizer_adamax(learning_rate = 0.01))
+  
+model %>% fit(x = X_train, y = Y_train, epochs = 30L)
+
+preds = 
+  model %>% predict(X_test)
+
+
+matplot(cbind(as.vector(preds[1:48,]),  
+              as.vector(Y_test[1:48,])), 
+        col = c("darkblue", "darkred"),
+        type = "o", 
+        pch = c(15, 16),
+        xlab = "week", ylab = "Drought")
+legend("topright", bty = "n", 
+       col = c("darkblue", "darkred"),
+      pch = c(15, 16), 
+      legend = c("Prediction", "True Values"))
+
+```
+
+
+
+
+
+The following code snippet shows you many (technical) things you need for building more complex network structures, even with LSTM cells (the following example doesn't have any functionality, it is just an example for how to process two different inputs in different ways within one network):
+
+
+::: panel-tabset
+## Keras
+
+```{r chunk_chapter5_1, message=FALSE, warning=FALSE}
+library(tensorflow)
+library(keras)
+set_random_seed(321L, disable_gpu = FALSE)	# Already sets R's random seed.
+
+tf$keras$backend$clear_session()  # Resets especially layer counter.
+
+inputDimension1 = 50L
+inputDimension2 = 10L
+
+input1 = layer_input(shape = inputDimension1)
+input2 = layer_input(shape = inputDimension2)
+
+modelInput2 = input2 %>%
+  layer_dropout(rate = 0.5) %>%
+  layer_dense(units = inputDimension2, activation = "gelu")
+
+modelMemory = input1 %>%
+  layer_embedding(input_dim = inputDimension1, output_dim = 64L) %>%
+  layer_lstm(units = 64L) %>%
+  layer_dropout(rate = 0.5) %>%
+  layer_dense(units = 2L, activation = "sigmoid")
+
+modelDeep = input1 %>%
+  layer_dropout(rate = 0.5) %>%
+  layer_dense(units = 64L, activation = "relu") %>%
+  layer_dropout(rate = 0.3) %>%
+  layer_dense(units = 64L, activation = "relu") %>%
+  layer_dense(units = 64L, activation = "relu") %>%
+  layer_dense(units = 5L, activation = "sigmoid")
+
+modelMain = layer_concatenate(c(modelMemory, modelDeep, modelInput2)) %>%
+  layer_dropout(rate = 0.25) %>%
+  layer_dense(units = 64L, activation = "relu") %>%
+  layer_dropout(rate = 0.3) %>%
+  layer_dense(units = 64L, activation = "relu") %>%
+  layer_dense(units = 2L, activation = "sigmoid")
+
+model = keras_model(
+  inputs = c(input1, input2),
+  outputs = c(modelMain)  # Use the whole modelMain (resp. its output) as output.
+)
+
+summary(model)
+# model %>% plot_model()
+```
+
+## Torch
+
+```{r chunk_chapter5_1_torch, message=FALSE, warning=FALSE}
+library(torch)
+
+model_torch = nn_module(
+  initialize = function(type, inputDimension1 = 50L, inputDimension2 = 10L) {
+    self$dim1 = inputDimension1
+    self$dim2 = inputDimension2
+    self$modelInput2 = nn_sequential(
+      nn_dropout(0.5),
+      nn_linear(in_features = self$dim2, out_features = self$dim2),
+      nn_selu()
+    )
+    self$modelMemory = nn_sequential(
+      nn_embedding(self$dim1, 64),
+      nn_lstm(64, 64)
+    )
+    self$modelMemoryOutput = nn_sequential(
+      nn_dropout(0.5),
+      nn_linear(64L, 2L),
+      nn_sigmoid()
+    )
+    
+    self$modelDeep = nn_sequential(
+      nn_dropout(0.5),
+      nn_linear(self$dim1, 64L),
+      nn_relu(),
+      nn_dropout(0.3),
+      nn_linear(64, 64),
+      nn_relu(),
+      nn_linear(64, 64),
+      nn_relu(),
+      nn_linear(64, 5),
+      nn_sigmoid()
+    )
+    
+    self$modelMain = nn_sequential(
+      nn_linear(7+self$dim2, 64),
+      nn_relu(),
+      nn_dropout(0.5),
+      nn_linear(64, 64),
+      nn_relu(),
+      nn_dropout(),
+      nn_linear(64, 2),
+      nn_sigmoid()
+    )
+  },
+  
+  forward = function(x) {
+    input1 = x[[1]]
+    input2 = x[[2]]
+    out2 = self$modelInput2(input2)
+    out1 = self$modelMemoryOutput( self$modelMemory(input1)$view(list(dim(input1)[1], -1)) )
+    out3 = self$modelDeep(input1)
+    out = self$modelMain(torch_cat(list(out1, out2, out3), 2))
+    return(out)
+  }
+  
+)
+
+(model_torch())
+
+```
+
+
+:::
+
@@ -0,0 +1,113 @@
+---
+output: html_document
+editor_options:
+  chunk_output_type: console
+---
+
+# Graph Neural Networks (GNNs)
+
+```{r}
+#| echo: false
+#| include: false
+#| results: false
+reticulate::use_condaenv("r-reticulate")
+library(tensorflow)
+tf
+tf$abs(3.)
+```
+
+
+Graph neural networks (GNN) is a young representative of the deep neural network family but is receiving more and more attention in the last years because of their ability to process non-Euclidean data such as graphs.
+
+Currently there is no R package for GNNs available. However, we can use the 'reticulate' package to use the python packages 'torch' (python version) and 'torch_geometric'.
+
+The following example was mostly adapted from the 'Node Classification with Graph Neural Networks' example from the [torch_geometric documentation](https://pytorchgeometric.readthedocs.io/en/latest/notes/colabs.html).
+
+The dataset is also provided by the 'torch_geometric' package and consists of molecules presented as graphs and the task is to predict whether HIV virus replication is inhibited by the molecule or not (classification, binary classification).
+
+```{r GNN_1, eval=FALSE}
+library(reticulate)
+# Load python packages torch and torch_geometric via the reticulate R package
+torch = import("torch") 
+torch_geometric = import("torch_geometric")
+# helper functions from the torch_geometric modules
+GCNConv = torch_geometric$nn$GCNConv
+global_mean_pool = torch_geometric$nn$global_mean_pool
+# Download the MUTAG TUDataset
+dataset = torch_geometric$datasets$TUDataset(root='data/TUDataset', 
+                                             name='MUTAG')
+dataloader = torch_geometric$loader$DataLoader(dataset, 
+                                               batch_size=64L,
+                                               shuffle=TRUE)
+# Create the model with a python class
+# There are two classes in the response variable
+GCN = PyClass(
+  "GCN", 
+   inherit = torch$nn$Module, 
+   defs = list(
+       `__init__` = function(self, hidden_channels) {
+         super()$`__init__`()
+         torch$manual_seed(42L)
+         self$conv = GCNConv(dataset$num_node_features, hidden_channels)
+         self$linear = torch$nn$Linear(hidden_channels, dataset$num_classes)
+         NULL
+       },
+       forward = function(self, x, edge_index, batch) {
+         x = self$conv(x, edge_index)
+         x = x$relu()
+         x = global_mean_pool(x, batch)
+         
+         x = torch$nn$functional$dropout(x, p = 0.5, training=self$training)
+         x = self$linear(x)
+         return(x)
+       }
+   ))
+```
+
+Training loop:
+
+```{r GNN_2, eval=FALSE}
+# create model object
+model = GCN(hidden_channels = 64L)
+# get optimizer and loss function
+optimizer = torch$optim$Adamax(model$parameters(), lr = 0.01)
+loss_func = torch$nn$CrossEntropyLoss()
+# set model into training mode (because of the dropout layer)
+model$train()
+# train model
+for(e in 1:50) {
+  iterator = reticulate::as_iterator(dataloader)
+  coro::loop(for (b in iterator) { 
+     pred = model(b$x, b$edge_index, b$batch)
+     loss = loss_func(pred, b$y)
+     loss$backward()
+     optimizer$step()
+     optimizer$zero_grad()
+  })
+  if(e %% 10 ==0) cat(paste0("Epoch: ",e," Loss: ", round(loss$item()[1], 4), "\n"))
+}
+## Epoch: 10 Loss: 0.6151
+## Epoch: 20 Loss: 0.6163
+## Epoch: 30 Loss: 0.5745
+## Epoch: 40 Loss: 0.5362
+## Epoch: 50 Loss: 0.5829
+```
+
+Make predictions:
+
+```{r GNN_3, eval = FALSE}
+preds = list()
+test = torch_geometric$loader$DataLoader(dataset, batch_size=64L,shuffle=FALSE)
+iterator = reticulate::as_iterator(test)
+model$eval()
+counter = 1
+coro::loop(for (b in iterator) {
+  preds[[counter]] = model(b$x, b$edge_index, b$batch)
+  counter <<- counter + 1
+  })
+head(torch$concat(preds)$sigmoid()$data$cpu()$numpy(), n = 3)
+##          [,1]      [,2]
+## [1,] 0.3076028 0.6427078
+## [2,] 0.4121239 0.5515330
+## [3,] 0.4119514 0.5516798
+```