From fe985c43b345b7d144b369271696e44d10dfbf44 Mon Sep 17 00:00:00 2001 From: Witold Wolski Date: Wed, 22 Jun 2022 16:12:25 +0200 Subject: [PATCH] vignettte with interactions. --- vignettes/ModelWithInteractions.Rmd | 261 ++++++++++++++++++++++++++++ vignettes/Modelling2Factors.Rmd | 2 - 2 files changed, 261 insertions(+), 2 deletions(-) create mode 100644 vignettes/ModelWithInteractions.Rmd diff --git a/vignettes/ModelWithInteractions.Rmd b/vignettes/ModelWithInteractions.Rmd new file mode 100644 index 000000000..7dbeb1f73 --- /dev/null +++ b/vignettes/ModelWithInteractions.Rmd @@ -0,0 +1,261 @@ +--- +title: "Modelling with Interactions." +author: "Witold Wolski" +date: "`r format(Sys.time(), '%d %B, %Y')`" +output: + html_document: default + pdf_document: default +vignette: | + %\VignetteIndexEntry{Modelling with Interactions.} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console +--- + + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE) +``` + + + + + + +```{r} +a <- c(a = 3,b = 4.5, c = 6, d = 7.5, e = 9) +f1 <- list(l1 = 3,l2 = 4.5) +f2 <- list(l1 = 0, l2 = 3) + +a = f1$l1 + f2$l1 +b = f1$l2 + f2$l1 +c = f1$l1 + f2$l2 +d = f1$l2 + f2$l2 +c(a,b,c,d) + +``` + +```{r LoadDataAndConfigure} +datadir <- file.path(find.package("prolfqua") , "samples/maxquant_txt") +inputMQfile <- file.path(datadir, "tiny2.zip") +inputAnnotation <- file.path(datadir, "annotation_Ionstar2018_PXD003881.xlsx") +startdata <- prolfqua::tidyMQ_ProteinGroups(inputMQfile) +``` + +Read the sample annotation. The sample annotation must contain the `raw.file` name and the explanatory variables of your experiment, e.g. treatment, timepoint, genetic background, or other factors which you would like to check for confounding. + + +```{r readAnnotation} +annotation <- readxl::read_xlsx(inputAnnotation) +head(annotation) +annotation <- annotation |> dplyr::filter(sample != "e") +annotation <- annotation |> + dplyr::mutate(f1 = dplyr::case_when(sample %in% c("a","c") ~ "l1", TRUE ~ "l2"), + f2 = dplyr::case_when(sample %in% c("a","b") ~ "l1", TRUE ~ "l2")) |> + dplyr::arrange(sample) +``` + +Merge the annotation with quantitative data using `inner_join` joining by `raw.file`. + +```{r addAnnotationToData} +startdata <- dplyr::inner_join(annotation, startdata, by = "raw.file") +``` + +We remove all proteins identified only by a single peptide. + + +```{r filterForAtLeastTwoPeptides} +startdata <- dplyr::filter(startdata, nr.peptides > 1) +``` + + +Then you need to _tell_ `prolfqua` which columns in the data frame contain what information. You do it using the `AnalysisTableAnnotation` class. + +```{r setupConfigs} +atable <- prolfqua::AnalysisTableAnnotation$new() +``` + +The `AnalysisTableAnnotation` has the following fields that need to be populated: + +- fileName +- hierarchy +- factors +- workingIntensity + +, and we will discuss in more detail next. + + +The `fileName` is the column with the raw file names, however for labelled TMT experiments, it can be used to hold the name of the TMT channel. + +```{r specifyRawFile} +atable$fileName = "raw.file" +``` + +The `hierarchy` field describes the structure of the MS data e.g, + +- protein +- peptides +- modified peptides +- precursor + +In case of the MQ proteinGroups file we have data on protein level. + +```{r specifyProteinID} +atable$hierarchy[["protein_Id"]] <- c("proteinID") + +``` + +In addition you need to describe the `factors` of the analysis, i.e, the column containing the explanatory variables. + +```{r specifyFactors} +atable$factors[["f1."]] = "f1" +atable$factors[["f2."]] = "f2" + +``` + +We also need to specify the column containing the protein abundances. + +```{r specifyIntensity} +atable$setWorkIntensity("mq.protein.intensity") +``` + +Finally we create the `AnalysisConfiguration` which needs the `AnalysisTableAnnotation` we just created and the `AnalysisParameters`. + +```{r createAnalysisConfig} +config <- prolfqua::AnalysisConfiguration$new(atable) +adata <- prolfqua::setup_analysis(startdata, config) + +``` + +Create the `LFQData` class instance and remove zeros from data (MaxQuant encodes missing values with zero). + +```{r removeSmallIntensities} +lfqdata <- prolfqua::LFQData$new(adata, config) +lfqdata$remove_small_intensities() +lfqdata$factors() +``` + + +```{r} +hm <- lfqdata$get_Plotter()$heatmap() +``` + +```{r plotHeatmap} +hm +``` + + +```{r} +tr <- lfqdata$get_Transformer() +lfqTrans <- tr$log2()$lfq +lfqTrans$get_Plotter()$intensity_distribution_density() +lfqTrans$response() +lfqTrans$rename_response("abundance") + +``` + + +# Model Fitting + + +```{r specifyModel} + +formula_Batches <- + prolfqua::strategy_lm("abundance ~ f1. * f2. ") + +# specify model definition +modelName <- "Model" +DEBUG <- TRUE + +Contrasts <- c("f1.l1vsf1.l2" = "f1.l1 - f1.l2", + "f2.l1vsf2.l2" = "f2.l1 - f2.l2", + "f1l1vsf1l2_gv_f2.l1" = "`f1.l1:f2.l1` - `f1.l2:f2.l1`", + "f1l1vsf1l2_gv_f2.l2" = "`f1.l1:f2.l2` - `f1.l2:f2.l2`", + "Interaction" = "`f1l1vsf1l2_gv_f2.l1` - `f1l1vsf1l2_gv_f2.l2`" + ) + +``` + + +```{r buildModel} + +mod <- prolfqua::build_model( + lfqTrans, + formula_Batches) + +``` + + + +```{r anovaPvaluePlots, fig.cap="p-value distributions for ANOVA analysis."} +mod$anova_histogram(what = "FDR.Pr..F.") + +``` + +## ANOVA + +Examine proteins with a significant interaction between the two factors treatment and batch. + +```{r anovaAnalysis} +ANOVA <- mod$get_anova() +ANOVA |> dplyr::filter(factor == "f1.:f2.") |> dplyr::arrange(FDR.Pr..F.) |> head(5) +ANOVA$factor |> unique() +protIntSig <- ANOVA |> dplyr::filter(factor == "f1.") |> + dplyr::filter(FDR.Pr..F. < 0.25) +protInt <- lfqTrans$get_copy() +protInt$data <- protInt$data[protInt$data$protein_Id %in% protIntSig$protein_Id,] +``` + + +```{r fig.with=15, fig.height=15, fig.cap="Proteins with FDR < 0.5 for condition batch interaction in ANOVA."} +ggpubr::ggarrange(plotlist = protInt$get_Plotter()$boxplots()$boxplot) +``` + +# Compute contrasts + +```{r computeModeratedContrasts} +contr <- prolfqua::ContrastsModerated$new(prolfqua::Contrasts$new(mod, Contrasts)) +#contr$get_contrasts_sides() +contrdf <- contr$get_contrasts() +``` + +```{r} +plotter <- contr$get_Plotter() +plotter$volcano() +plotter$ma_plot() + +``` + + + +## Annalyse contrasts with missing data imputation + +```{r} +lfqTrans$config$table$factorDepth <- 2 +#ContrastsSimpleImpute$debug("get_contrasts") +contrSimple <- prolfqua::ContrastsSimpleImpute$new(lfqdata = lfqTrans, Contrasts) +contrdfSimple <- contrSimple$get_contrasts() +#na.omit(contrdfSimple) +pl <- contrSimple$get_Plotter() +pl$histogram_diff() +pl$volcano() +``` + + +## Merge nonimputed and imputed data. + +```{r} + +dim(contr$get_contrasts()) +dim(contrSimple$get_contrasts()) + +mergedContrasts <- prolfqua::addContrastResults(prefer = contr, add = contrSimple)$merged +cM <- mergedContrasts$get_Plotter() +plot <- cM$volcano() +plot$FDR +``` + + + + diff --git a/vignettes/Modelling2Factors.Rmd b/vignettes/Modelling2Factors.Rmd index 5521d7d1e..8d2d966a0 100644 --- a/vignettes/Modelling2Factors.Rmd +++ b/vignettes/Modelling2Factors.Rmd @@ -48,8 +48,6 @@ Contrasts <- c("Glucose - Ethanol" = "condition_Glucose - condition_Ethanol", "Interaction" = "`Glucose_vs_Ethanol_gv_p2370` - `Glucose_vs_Ethanol_gv_p2691`" ) - - ```