From fe985c43b345b7d144b369271696e44d10dfbf44 Mon Sep 17 00:00:00 2001
From: Witold Wolski <wewolski@gmail.com>
Date: Wed, 22 Jun 2022 16:12:25 +0200
Subject: [PATCH] vignettte with interactions.

---
 vignettes/ModelWithInteractions.Rmd | 261 ++++++++++++++++++++++++++++
 vignettes/Modelling2Factors.Rmd     |   2 -
 2 files changed, 261 insertions(+), 2 deletions(-)
 create mode 100644 vignettes/ModelWithInteractions.Rmd

diff --git a/vignettes/ModelWithInteractions.Rmd b/vignettes/ModelWithInteractions.Rmd
new file mode 100644
index 000000000..7dbeb1f73
--- /dev/null
+++ b/vignettes/ModelWithInteractions.Rmd
@@ -0,0 +1,261 @@
+---
+title: "Modelling with Interactions."
+author: "Witold Wolski"
+date: "`r format(Sys.time(), '%d %B, %Y')`"
+output:
+  html_document: default
+  pdf_document: default
+vignette: |
+  %\VignetteIndexEntry{Modelling with Interactions.}  
+  %\VignetteEncoding{UTF-8}   
+  %\VignetteEngine{knitr::rmarkdown}
+editor_options: 
+  chunk_output_type: console
+---
+
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
+```
+
+
+
+
+
+
+```{r}
+a <- c(a = 3,b = 4.5, c = 6, d = 7.5, e = 9)
+f1 <- list(l1 = 3,l2 = 4.5)
+f2 <- list(l1 = 0, l2 = 3) 
+
+a =  f1$l1 + f2$l1
+b = f1$l2 + f2$l1
+c = f1$l1 + f2$l2
+d = f1$l2 + f2$l2
+c(a,b,c,d)
+
+```
+
+```{r LoadDataAndConfigure}
+datadir <- file.path(find.package("prolfqua") , "samples/maxquant_txt")
+inputMQfile <-  file.path(datadir, "tiny2.zip")
+inputAnnotation <- file.path(datadir, "annotation_Ionstar2018_PXD003881.xlsx")
+startdata <- prolfqua::tidyMQ_ProteinGroups(inputMQfile)
+```
+
+Read the sample annotation. The sample annotation must contain the `raw.file` name and the explanatory variables of your experiment, e.g. treatment, timepoint, genetic background, or other factors which you would like to check for confounding.
+
+
+```{r readAnnotation}
+annotation <- readxl::read_xlsx(inputAnnotation)
+head(annotation)
+annotation <- annotation |> dplyr::filter(sample != "e")
+annotation <- annotation |> 
+  dplyr::mutate(f1 = dplyr::case_when(sample %in% c("a","c") ~ "l1", TRUE ~ "l2"),
+                f2 = dplyr::case_when(sample %in% c("a","b") ~ "l1", TRUE ~ "l2")) |>
+  dplyr::arrange(sample)
+```
+
+Merge the annotation with quantitative data using `inner_join` joining by `raw.file`.
+
+```{r addAnnotationToData}
+startdata <- dplyr::inner_join(annotation, startdata, by = "raw.file")
+```
+
+We remove all proteins identified only by a single peptide.
+
+
+```{r filterForAtLeastTwoPeptides}
+startdata <- dplyr::filter(startdata, nr.peptides > 1)
+```
+
+
+Then you need to _tell_ `prolfqua` which columns in the data frame contain what information. You do it using the `AnalysisTableAnnotation` class.
+
+```{r setupConfigs}
+atable <- prolfqua::AnalysisTableAnnotation$new()
+```
+
+The `AnalysisTableAnnotation` has the following fields that need to be populated:
+
+- fileName
+- hierarchy
+- factors
+- workingIntensity
+
+, and we will discuss in more detail next.
+
+
+The `fileName` is the column with the raw file names, however for labelled TMT experiments, it can be used to hold the name of the TMT channel.
+
+```{r specifyRawFile}
+atable$fileName = "raw.file"
+```
+
+The `hierarchy` field describes the structure of the MS data e.g, 
+
+- protein 
+- peptides
+- modified peptides
+- precursor
+
+In case of the MQ proteinGroups file we have data on protein level.
+
+```{r specifyProteinID}
+atable$hierarchy[["protein_Id"]] <- c("proteinID")
+
+```
+
+In addition you need to describe the `factors` of the analysis, i.e, the column containing the explanatory variables. 
+
+```{r specifyFactors}
+atable$factors[["f1."]] = "f1"
+atable$factors[["f2."]] = "f2"
+
+```
+
+We also need to specify the column containing the protein abundances.
+
+```{r specifyIntensity}
+atable$setWorkIntensity("mq.protein.intensity")
+```
+
+Finally we create the `AnalysisConfiguration` which needs the `AnalysisTableAnnotation` we just created and the `AnalysisParameters`. 
+
+```{r createAnalysisConfig}
+config <- prolfqua::AnalysisConfiguration$new(atable)
+adata <- prolfqua::setup_analysis(startdata, config)
+
+```
+
+Create the `LFQData` class instance and remove zeros from data (MaxQuant encodes missing values with zero).
+
+```{r removeSmallIntensities}
+lfqdata <- prolfqua::LFQData$new(adata, config)
+lfqdata$remove_small_intensities()
+lfqdata$factors()
+```
+
+
+```{r}
+hm <- lfqdata$get_Plotter()$heatmap()
+```
+
+```{r plotHeatmap}
+hm
+```
+
+
+```{r}
+tr <- lfqdata$get_Transformer()
+lfqTrans <- tr$log2()$lfq
+lfqTrans$get_Plotter()$intensity_distribution_density()
+lfqTrans$response()
+lfqTrans$rename_response("abundance")
+
+```
+
+
+# Model Fitting
+
+
+```{r specifyModel}
+
+formula_Batches <-
+  prolfqua::strategy_lm("abundance ~ f1. * f2. ")
+
+# specify model definition
+modelName  <- "Model"
+DEBUG <- TRUE
+
+Contrasts <- c("f1.l1vsf1.l2" = "f1.l1 - f1.l2",
+               "f2.l1vsf2.l2" = "f2.l1 - f2.l2",
+               "f1l1vsf1l2_gv_f2.l1" = "`f1.l1:f2.l1` - `f1.l2:f2.l1`",
+               "f1l1vsf1l2_gv_f2.l2" = "`f1.l1:f2.l2` - `f1.l2:f2.l2`",
+               "Interaction" = "`f1l1vsf1l2_gv_f2.l1` - `f1l1vsf1l2_gv_f2.l2`"
+               )
+
+```
+
+
+```{r buildModel}
+
+mod <- prolfqua::build_model(
+  lfqTrans,
+  formula_Batches)
+
+```
+
+
+
+```{r anovaPvaluePlots, fig.cap="p-value distributions for ANOVA analysis."}
+mod$anova_histogram(what = "FDR.Pr..F.")
+
+```
+
+## ANOVA
+
+Examine proteins with a significant interaction between the two factors treatment and batch.
+
+```{r anovaAnalysis}
+ANOVA <- mod$get_anova()
+ANOVA |> dplyr::filter(factor == "f1.:f2.") |> dplyr::arrange(FDR.Pr..F.) |> head(5)
+ANOVA$factor |> unique()
+protIntSig <- ANOVA |> dplyr::filter(factor == "f1.") |>
+  dplyr::filter(FDR.Pr..F. < 0.25)
+protInt <-  lfqTrans$get_copy()
+protInt$data <- protInt$data[protInt$data$protein_Id %in% protIntSig$protein_Id,]
+```
+
+
+```{r fig.with=15, fig.height=15, fig.cap="Proteins with FDR < 0.5 for condition batch interaction in ANOVA."}
+ggpubr::ggarrange(plotlist = protInt$get_Plotter()$boxplots()$boxplot)
+```
+
+# Compute contrasts
+
+```{r computeModeratedContrasts}
+contr <- prolfqua::ContrastsModerated$new(prolfqua::Contrasts$new(mod, Contrasts))
+#contr$get_contrasts_sides()
+contrdf <- contr$get_contrasts()
+```
+
+```{r}
+plotter <- contr$get_Plotter()
+plotter$volcano()
+plotter$ma_plot()
+
+```
+
+
+
+## Annalyse contrasts with missing data imputation
+
+```{r}
+lfqTrans$config$table$factorDepth <- 2
+#ContrastsSimpleImpute$debug("get_contrasts")
+contrSimple <- prolfqua::ContrastsSimpleImpute$new(lfqdata = lfqTrans, Contrasts)
+contrdfSimple <- contrSimple$get_contrasts()
+#na.omit(contrdfSimple)
+pl <- contrSimple$get_Plotter()
+pl$histogram_diff()
+pl$volcano()
+```
+
+
+## Merge nonimputed and imputed data.
+
+```{r}
+
+dim(contr$get_contrasts())
+dim(contrSimple$get_contrasts())
+
+mergedContrasts <- prolfqua::addContrastResults(prefer = contr, add = contrSimple)$merged
+cM <- mergedContrasts$get_Plotter()
+plot <- cM$volcano()
+plot$FDR
+```
+
+
+
+
diff --git a/vignettes/Modelling2Factors.Rmd b/vignettes/Modelling2Factors.Rmd
index 5521d7d1e..8d2d966a0 100644
--- a/vignettes/Modelling2Factors.Rmd
+++ b/vignettes/Modelling2Factors.Rmd
@@ -48,8 +48,6 @@ Contrasts <- c("Glucose - Ethanol" = "condition_Glucose - condition_Ethanol",
                "Interaction" = "`Glucose_vs_Ethanol_gv_p2370` - `Glucose_vs_Ethanol_gv_p2691`"
 )
 
-
-
 ```