From c735bed99ea61b404e6f83797a07f4db5b06c876 Mon Sep 17 00:00:00 2001 From: Lauren Hsu Date: Tue, 23 Jul 2024 17:55:26 -0400 Subject: [PATCH 1/5] Add monster wrapper function that takes as input 2 premade gene regulatory networks (eg from panda) --- R/MONSTER.R | 70 +++++++++++++++++++++++++++++++++++++++- vignettes/MONSTER.Rmd | 75 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 130 insertions(+), 15 deletions(-) diff --git a/R/MONSTER.R b/R/MONSTER.R index 5d5c5514..4e1be2d7 100644 --- a/R/MONSTER.R +++ b/R/MONSTER.R @@ -118,7 +118,8 @@ monsterPrintMonsterAnalysis <- function(x, ...){ #' design=c(rep(0,nGenes),rep(1, nGenes)) #' monsterRes <- monster(expr, design, motif=NA, nullPerms=10, numMaxCores=1, mode='regNet') #' } - +#' # alternatively, if a gene regulatory network has already been estimated, +#' # see the domonster function for quick start monster <- function(expr, design, motif, @@ -1136,3 +1137,70 @@ NULL #' @references Glass K, Huttenhower C, Quackenbush J, Yuan GC. Passing Messages Between Biological Networks to Refine Predicted Interactions. PLoS One. 2013 May 31;8(5):e64832. #' NULL + + +#' MONSTER quick-start with pre-made regulatory networks +#' +#' This function is a wrapper to simplify usage of the \code{monster} function in +#' the case where the pair of regulatory networks to be contrasted have already +#' been estimated, either as \code{panda} objects, or represented as an +#' adjacency matrix with regulators in rows and genes in columns. +#' +#' @param exp_graph matrix or PANDA object (generated by \code{netzooR::panda}); if matrix, should be the adjacency matrix for the network with regulators in rows and genes in columns, both named. This should be the network for the experimental (case) group. +#' @param control_graph matrix or PANDA object (generated by \code{netzooR::panda}); if matrix, should be the adjacency matrix for the network with regulators in rows and genes in columns, both named. This should be the network for the control (reference) group. +#' @param nullPerms numeric; defaults to 1000. Number of null permutations to perform. See \code{monster} for more details. +#' @param numMaxCores numeric; defaults to 3. Maximum number of cores to use; will be the minimum of this number and the actual available cores. See \code{monster} for more details. +#' @param ... other arguments for \code{monster} may be passed. +#' +#' @return \code{monster} object +#' @export +#' +#' @examples +#' +#' Generating PANDA networks for demonstration: +#' # For the purposes of this example, first partition the pandaToyData samples, then perform panda: +#' pandaResult_exp <- panda(pandaToyData$motif, pandaToyData$expression[,1:25], pandaToyData$ppi) +#' pandaResult_control <- panda(pandaToyData$motif, pandaToyData$expression[,26:50], pandaToyData$ppi) +#' +#' # function takes both panda objects and matrices, or a mixture +#' monster_res1 <- domonster(pandaResult_exp, pandaResult_control) +#' monster_res2 <- domonster(pandaResult_exp@regNet, pandaResult_control@regNet) +#' monster_res3 <- domonster(pandaResult_exp@regNet, pandaResult_control) +domonster <- function(exp_graph, control_graph, nullPerms = 1000, numMaxCores = 3, ...){ + if('panda' %in% class(exp_graph)){ + exp_graph <- exp_graph@regNet + } + if('panda' %in% class(control_graph)){ + control_graph <- control_graph@regNet + } + + if(!identical(colnames(control_graph), colnames(exp_graph))){ + genes_keep <- intersect(colnames(control_graph), colnames(exp_graph)) + + exp_graph <- exp_graph[,genes_keep] + control_graph <- control_graph[,genes_keep] + + cat('\nNote: column names are not identical in the input; taking the intersection of them:\n') + cat(paste0(length(genes_keep), ' genes included \n')) + } + if(!identical(rownames(control_graph), rownames(exp_graph))){ + reg_keep <- intersect(rownames(control_graph), rownames(exp_graph)) + + exp_graph <- exp_graph[reg_keep,] + control_graph <- control_graph[reg_keep,] + + cat('\Note: row names are not identical in the input; taking the intersection of them:\n') + cat(paste0(length(reg_keep), ' regulators included \n')) + } + + combdf <- as.data.frame(cbind(control_graph, exp_graph)) + combdes <- c(rep(0, ncol(control_graph)), rep(1, ncol(exp_graph))) + res <- monster(expr = combdf, + design = combdes, + motif = NA, + mode = 'regNet', + nullPerms = nullPerms, + numMaxCores = numMaxCores, + ...) + return(res) +} diff --git a/vignettes/MONSTER.Rmd b/vignettes/MONSTER.Rmd index c3e55352..ec034753 100644 --- a/vignettes/MONSTER.Rmd +++ b/vignettes/MONSTER.Rmd @@ -29,7 +29,7 @@ MONSTER takes in sequence motif data linking transcription factors (TFs) to gene Install and load netZooR package ```{r,eval = FALSE} -# install.packages("devtools") +# install.packages("devtools") library(devtools) # install netZooR pkg with vignettes, otherwise remove the "build_vignettes = TRUE" argument. devtools::install_github("netZoo/netZooR", build_vignettes = TRUE) @@ -39,11 +39,14 @@ devtools::install_github("netZoo/netZooR", build_vignettes = TRUE) library(netZooR) ``` -## Input files + +## Full MONSTER tutorial in build network mode + +### Input files In this demo, we use the included Yeast dataset, containing three separate Yeast datasets, and a sequence motif object. The `yeast` dataset includes three separate experimental designs each involving microarray assays of 2555 genes of yeast (Saccharomyces cerevisiae). `yeast$exp.ko` is a set of 106 gene expression samples of following a number of gene knockouts. `yeast$exp.cc` is a set of 50 gene expression samples taken sets of two across 25 timepoints spanning the cellular cycle. `yeast$exp.sr` is a set of 173 gene expression samples collected under conditions of stress such as heat shock. Each expression dataset has been normalized and scaled. -In this tutorial, we will run `monster` to identify suspected TF drivers of the change from the early cell cycle to late cell cycle. +In this tutorial, we will run `monster` to identify suspected TF drivers of the change from the early cell cycle to late cell cycle. First, we load the data included in the `netZooR` package @@ -51,7 +54,7 @@ First, we load the data included in the `netZooR` package data(yeast) ``` -Next, we create our design vector indicating to which group each sample belongs. This vector must contain only 0's and 1's (NAs allowed). +Next, we create our design vector indicating to which group each sample belongs. This vector must contain only 0's and 1's (NAs allowed). In this example we are running the analysis on the first 10 timepoints compared to the last 10 timepoints, ignoring the middle 5 for the purposes of simplicity in this tutorial. Each timepoint contains two samples. @@ -59,11 +62,11 @@ In this example we are running the analysis on the first 10 timepoints compared design <- c(rep(0,20),rep(NA,10),rep(1,20)) ``` -The main method in MONSTER is the `monster` function. This function has three required arguments, +The main method in MONSTER is the `monster` function. This function has three required arguments, -* A gene expression matrix, `yeast$exp.cc` +* A gene expression matrix, `yeast$exp.cc` * A motif mapping data.frame, `yeast$motif` -* A design integer vector, `design` +* A design integer vector, `design` The gene expression argument may be a `matrix`, a `data.frame` or an `ExpressionSet`. In this example, `yeast$exp.cc` is a `data.frame` consisting of 2555 rows and 50 columns. @@ -74,7 +77,7 @@ yeast$exp.cc[1:5,1:5] The motif mapping data.frame tells the MONSTER algorithm which genes contain likely transcription factor binding sites in the vicinity of their promoter. This serves as the regulatory prior and informs the initial network inference method by supplying a partial list of TF targeting. -This data.frame contains 3 columns, where each row is a specific edge in the prior network. +This data.frame contains 3 columns, where each row is a specific edge in the prior network. * Column 1 specifies the transcription factor for the edge. * Column 2 specifies the targeted gene for the edge @@ -92,7 +95,7 @@ MONSTER tests the statistical significance of its results by permuting the sampl Monster is optimized to run on multiple cores, if available. We can specify the maximum number of cores which are available to run the algorithm. If `numMaxCores` unspecified, MONSTER will check available resources and run on all but four of the available cores. -## Running MONSTER +### Running MONSTER ```{r, message=F} yeast$exp.cc[is.na(yeast$exp.cc)] <- mean(as.matrix(yeast$exp.cc),na.rm=TRUE) @@ -124,7 +127,7 @@ We can view the $dTFI$ with the generic `plot` function monsterPlotMonsterAnalysis(monsterRes) ``` -In this plot, we have the $dTFI$ for each transcription factor along the x-axis. The observed values are shown in red and the null values are shown in blue. +In this plot, we have the $dTFI$ for each transcription factor along the x-axis. The observed values are shown in red and the null values are shown in blue. Due to the complex nature of the structure of gene regulatory networks, it is not uncommon to see transcription factors which exhibit a high degree of transitional change, but which is not statistically significant due to the high variability of that particular TF (e.g. YDR463W). Conversely, some TFs show weak changes, but those changes are large compared to the changes observed in null transitions (e.g. YKL062W). Ideally, we are most interested in TFs which demonstrate large changes in targetting pattern which is found to be strongly significant (e.g. YJL056C). @@ -138,7 +141,7 @@ Our top hit here is YDL056W, which reassuringly is established in the literature *** -The dTFI plot focuses primarily on individual transcription factors which have systematically changed their targetting patterns between groups. To dive further into the mechanisms, we may be specifically interested in which TFs are acquiring the targetting signatures of which other TFs. We can visualize the transition matrix in a number of ways using MONSTER. +The dTFI plot focuses primarily on individual transcription factors which have systematically changed their targetting patterns between groups. To dive further into the mechanisms, we may be specifically interested in which TFs are acquiring the targetting signatures of which other TFs. We can visualize the transition matrix in a number of ways using MONSTER. First, using the package `gplots`, we can simply plot the transition matrix. The `heatmap.2` function will show the $m\times m$ transition matrix in the form of a square heatmap with $m$ being the number of transcription factors. Intuitively, this is the operator, $\textbf{T}$, on which we transform gene regulatory network $\textbf{A}$ (The first 10 timepoints in the Yeast Cell Cycle) to network $\textbf{B}$ (The last 10 timepoints in the Yeast Cell Cycle) via the equation $$\textbf{B}=\textbf{AT} + \textbf{E}$$ @@ -148,9 +151,9 @@ where $\textbf{E}$ is the $p\times m$ error matrix which we are minimizing. ```{r, fig.width=10, fig.height=8, out.width = 750, out.height = 600, message=FALSE} library(gplots) heatmap.2(slot(monsterRes, 'tm'), col = "bluered", -density.info="none", -trace="none", -dendrogram='none', +density.info="none", +trace="none", +dendrogram='none', Rowv=FALSE, Colv=FALSE) ``` @@ -185,3 +188,47 @@ Furthermore, we are often interested in correlated targetting pattern sharing. ``` The above plot can also be achieved using the included `MONSTER` function, `monster.transitionPCAPlot(monsterRes)`. + +## MONSTER quick-start with pre-made regulatory networks + +For analyses where MONSTER is to be performed after a regulatory network has already been estimated (e.g., with PANDA), the `domonster` function can be used to easily prepare the input networks for running `monster`. + +`domonster` takes as input 2 networks (`exp_graph` and `control_graph`), either as PANDA objects or adjacency matrices (regulators in rows; genes in columns). + +- `exp_graph` is the graph generated from experimental or case samples +- `control_graph` is the graph generated from control or reference samples + +The transition matrix $M_{\mathrm{transition}}$ that the MONSTER algorithm estimates will be that for: + +$$G_{\mathrm{control}} M_{\mathrm{transition}} = G_{\mathrm{exp}}$$ + +where $G_{\mathrm{control}}$ and $G_{\mathrm{exp}}$ are the control and experimental graphs, respectively. + +To demonstrate the code set-up with a toy example, we start by creating toy datasets using `pandaToyData`. For the purposes of demonstration, we partition the samples into experimental and control: + +```{r, message = F} +# Generating PANDA networks from partitioned toy data + +pandaResult_exp <- panda(pandaToyData$motif, pandaToyData$expression[,1:25], pandaToyData$ppi) +pandaResult_control <- panda(pandaToyData$motif, pandaToyData$expression[,26:50], pandaToyData$ppi) +``` + +Then, these outputs generated by PANDA can be used as input either in PANDA format directly, or with the network. Any of the below code options will work and be equivalent to each other. In addition to PANDA networks, any regulatory network graph in adjacency matrix format (regulators in rows and genes in columns) can be used. + +```{r, message = F} +monster_res1 <- domonster(exp_graph = pandaResult_exp, + control_graph = pandaResult_control, + nullPerms = 10) + +monster_res2 <- domonster(exp_graph = pandaResult_exp@regNet, + control_graph = pandaResult_control@regNet, + nullPerms = 10) + +monster_res3 <- domonster(exp_graph = pandaResult_exp@regNet, + control_graph = pandaResult_control, + nullPerms = 10) +``` + +For this code example, we compute only 10 null permutations. However, the function defaults to 1000, as would likely be used in an actual analysis to ensure sufficient null observations to estimate null distributions and compute empirical p-values. + +The results can then be further analyzed and visualized as described in the vignette above. From 55b297bf7b3f38d2a62372ec4a73203ab350fa86 Mon Sep 17 00:00:00 2001 From: Lauren Hsu Date: Wed, 24 Jul 2024 22:59:51 -0400 Subject: [PATCH 2/5] - updates post devtools::document() - fixed typo in domonster error message --- NAMESPACE | 1 + R/MONSTER.R | 2 +- man/domonster.Rd | 40 ++++++++++++++++++++++++++++++++++++++++ man/monster.Rd | 5 ++++- 4 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 man/domonster.Rd diff --git a/NAMESPACE b/NAMESPACE index a6f2033b..536ef671 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(craneBipartite) export(craneUnipartite) export(createCondorObject) export(createPandaStyle) +export(domonster) export(downloadGTEx) export(dragon) export(el2adj) diff --git a/R/MONSTER.R b/R/MONSTER.R index 4e1be2d7..2a962f69 100644 --- a/R/MONSTER.R +++ b/R/MONSTER.R @@ -1189,7 +1189,7 @@ domonster <- function(exp_graph, control_graph, nullPerms = 1000, numMaxCores = exp_graph <- exp_graph[reg_keep,] control_graph <- control_graph[reg_keep,] - cat('\Note: row names are not identical in the input; taking the intersection of them:\n') + cat('\nNote: row names are not identical in the input; taking the intersection of them:\n') cat(paste0(length(reg_keep), ' regulators included \n')) } diff --git a/man/domonster.Rd b/man/domonster.Rd new file mode 100644 index 00000000..5a63f6cf --- /dev/null +++ b/man/domonster.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MONSTER.R +\name{domonster} +\alias{domonster} +\title{MONSTER quick-start with pre-made regulatory networks} +\usage{ +domonster(exp_graph, control_graph, nullPerms = 1000, numMaxCores = 3, ...) +} +\arguments{ +\item{exp_graph}{matrix or PANDA object (generated by \code{netzooR::panda}); if matrix, should be the adjacency matrix for the network with regulators in rows and genes in columns, both named. This should be the network for the experimental (case) group.} + +\item{control_graph}{matrix or PANDA object (generated by \code{netzooR::panda}); if matrix, should be the adjacency matrix for the network with regulators in rows and genes in columns, both named. This should be the network for the control (reference) group.} + +\item{nullPerms}{numeric; defaults to 1000. Number of null permutations to perform. See \code{monster} for more details.} + +\item{numMaxCores}{numeric; defaults to 3. Maximum number of cores to use; will be the minimum of this number and the actual available cores. See \code{monster} for more details.} + +\item{...}{other arguments for \code{monster} may be passed.} +} +\value{ +\code{monster} object +} +\description{ +This function is a wrapper to simplify usage of the \code{monster} function in +the case where the pair of regulatory networks to be contrasted have already +been estimated, either as \code{panda} objects, or represented as an +adjacency matrix with regulators in rows and genes in columns. +} +\examples{ + +Generating PANDA networks for demonstration: +# For the purposes of this example, first partition the pandaToyData samples, then perform panda: +pandaResult_exp <- panda(pandaToyData$motif, pandaToyData$expression[,1:25], pandaToyData$ppi) +pandaResult_control <- panda(pandaToyData$motif, pandaToyData$expression[,26:50], pandaToyData$ppi) + +# function takes both panda objects and matrices, or a mixture +monster_res1 <- domonster(pandaResult_exp, pandaResult_control) +monster_res2 <- domonster(pandaResult_exp@regNet, pandaResult_control@regNet) +monster_res3 <- domonster(pandaResult_exp@regNet, pandaResult_control) +} diff --git a/man/monster.Rd b/man/monster.Rd index 4b48bf49..53255d11 100644 --- a/man/monster.Rd +++ b/man/monster.Rd @@ -45,7 +45,8 @@ to give to indirect compared to direct evidence. The default is 0.5 to give an equal weight to direct and indirect evidence.} \item{mode}{A parameter telling whether to build the regulatory networks ('buildNet') or to use provided regulatory networks -('regNet'). If set to 'regNet', then the parameters motif, ni_method, ni.coefficient.cutoff, and alphaw will be set to NA.} +('regNet'). If set to 'regNet', then the parameters motif, ni_method, ni.coefficient.cutoff, and alphaw will be set to NA. Gene regulatory +networks are supplied in the 'expr' variable as a TF-by-Gene matrix, by concatenating the TF-by-Gene matrices of case and control, expr has size nTFs x 2nGenes.} } \value{ An object of class "monsterAnalysis" containing results @@ -85,4 +86,6 @@ expr = as.data.frame(cbind(control,case)) design=c(rep(0,nGenes),rep(1, nGenes)) monsterRes <- monster(expr, design, motif=NA, nullPerms=10, numMaxCores=1, mode='regNet') } +# alternatively, if a gene regulatory network has already been estimated, +# see the domonster function for quick start } From cd264eef8182587723eda993dc02523abac561d9 Mon Sep 17 00:00:00 2001 From: Lauren Hsu Date: Wed, 24 Jul 2024 23:12:54 -0400 Subject: [PATCH 3/5] update math notation to match existing vignette content --- vignettes/MONSTER.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/MONSTER.Rmd b/vignettes/MONSTER.Rmd index ec034753..2df55bc7 100644 --- a/vignettes/MONSTER.Rmd +++ b/vignettes/MONSTER.Rmd @@ -198,9 +198,9 @@ For analyses where MONSTER is to be performed after a regulatory network has alr - `exp_graph` is the graph generated from experimental or case samples - `control_graph` is the graph generated from control or reference samples -The transition matrix $M_{\mathrm{transition}}$ that the MONSTER algorithm estimates will be that for: +The transition matrix $\hat{\Tau}$ that the MONSTER algorithm estimates will be that for: -$$G_{\mathrm{control}} M_{\mathrm{transition}} = G_{\mathrm{exp}}$$ +$$G_{\mathrm{control}} \hat{\tau} = G_{\mathrm{exp}}$$ where $G_{\mathrm{control}}$ and $G_{\mathrm{exp}}$ are the control and experimental graphs, respectively. From e90b910327d971b49bb63fa3edc088cbd110ec46 Mon Sep 17 00:00:00 2001 From: Lauren Hsu Date: Wed, 24 Jul 2024 23:26:01 -0400 Subject: [PATCH 4/5] typo in examples --- R/MONSTER.R | 2 +- man/domonster.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MONSTER.R b/R/MONSTER.R index 2a962f69..8cdbe9a3 100644 --- a/R/MONSTER.R +++ b/R/MONSTER.R @@ -1157,7 +1157,7 @@ NULL #' #' @examples #' -#' Generating PANDA networks for demonstration: +#' # Generating PANDA networks for demonstration: #' # For the purposes of this example, first partition the pandaToyData samples, then perform panda: #' pandaResult_exp <- panda(pandaToyData$motif, pandaToyData$expression[,1:25], pandaToyData$ppi) #' pandaResult_control <- panda(pandaToyData$motif, pandaToyData$expression[,26:50], pandaToyData$ppi) diff --git a/man/domonster.Rd b/man/domonster.Rd index 5a63f6cf..d5466cea 100644 --- a/man/domonster.Rd +++ b/man/domonster.Rd @@ -28,7 +28,7 @@ adjacency matrix with regulators in rows and genes in columns. } \examples{ -Generating PANDA networks for demonstration: +# Generating PANDA networks for demonstration: # For the purposes of this example, first partition the pandaToyData samples, then perform panda: pandaResult_exp <- panda(pandaToyData$motif, pandaToyData$expression[,1:25], pandaToyData$ppi) pandaResult_control <- panda(pandaToyData$motif, pandaToyData$expression[,26:50], pandaToyData$ppi) From dc8676e20ec6d4162ca9c7c380c253188704fe58 Mon Sep 17 00:00:00 2001 From: Lauren Hsu Date: Wed, 24 Jul 2024 23:26:01 -0400 Subject: [PATCH 5/5] set examples to donttest --- R/MONSTER.R | 5 +++-- man/domonster.Rd | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/MONSTER.R b/R/MONSTER.R index 2a962f69..58828704 100644 --- a/R/MONSTER.R +++ b/R/MONSTER.R @@ -1156,8 +1156,8 @@ NULL #' @export #' #' @examples -#' -#' Generating PANDA networks for demonstration: +#' \donttest{ +#' # Generating PANDA networks for demonstration: #' # For the purposes of this example, first partition the pandaToyData samples, then perform panda: #' pandaResult_exp <- panda(pandaToyData$motif, pandaToyData$expression[,1:25], pandaToyData$ppi) #' pandaResult_control <- panda(pandaToyData$motif, pandaToyData$expression[,26:50], pandaToyData$ppi) @@ -1166,6 +1166,7 @@ NULL #' monster_res1 <- domonster(pandaResult_exp, pandaResult_control) #' monster_res2 <- domonster(pandaResult_exp@regNet, pandaResult_control@regNet) #' monster_res3 <- domonster(pandaResult_exp@regNet, pandaResult_control) +#' } domonster <- function(exp_graph, control_graph, nullPerms = 1000, numMaxCores = 3, ...){ if('panda' %in% class(exp_graph)){ exp_graph <- exp_graph@regNet diff --git a/man/domonster.Rd b/man/domonster.Rd index 5a63f6cf..96886428 100644 --- a/man/domonster.Rd +++ b/man/domonster.Rd @@ -27,8 +27,8 @@ been estimated, either as \code{panda} objects, or represented as an adjacency matrix with regulators in rows and genes in columns. } \examples{ - -Generating PANDA networks for demonstration: +\donttest{ +# Generating PANDA networks for demonstration: # For the purposes of this example, first partition the pandaToyData samples, then perform panda: pandaResult_exp <- panda(pandaToyData$motif, pandaToyData$expression[,1:25], pandaToyData$ppi) pandaResult_control <- panda(pandaToyData$motif, pandaToyData$expression[,26:50], pandaToyData$ppi) @@ -38,3 +38,4 @@ monster_res1 <- domonster(pandaResult_exp, pandaResult_control) monster_res2 <- domonster(pandaResult_exp@regNet, pandaResult_control@regNet) monster_res3 <- domonster(pandaResult_exp@regNet, pandaResult_control) } +}