diff --git a/DESCRIPTION b/DESCRIPTION index cf432035..81b25bac 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: CoGAPS -Version: 3.23.2 +Version: 3.24.0 Date: 2024-03-22 Title: Coordinated Gene Activity in Pattern Sets Author: Jeanette Johnson, Ashley Tsang, Jacob Mitchell, Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey, @@ -33,12 +33,10 @@ Imports: tools, utils, rhdf5, - biomaRt, dplyr, fgsea, forcats, - ggplot2, - msigdbr + ggplot2 Suggests: testthat, knitr, diff --git a/NAMESPACE b/NAMESPACE index 3b2653b7..24cd0ad2 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,7 +20,7 @@ export(getFeatureLoadings) export(getMeanChiSq) export(getOriginalParameters) export(getParam) -export(getPatternHallmarks) +export(getPatternGeneSet) export(getPatternMatrix) export(getRetinaSubset) export(getSampleFactors) @@ -28,7 +28,7 @@ export(getSubsets) export(getUnmatchedPatterns) export(getVersion) export(patternMarkers) -export(plotPatternHallmarks) +export(plotPatternGeneSet) export(plotPatternMarkers) export(plotResiduals) export(reconstructGene) @@ -42,7 +42,6 @@ exportClasses(CogapsParams) exportClasses(CogapsResult) import(dplyr) import(fgsea) -import(msigdbr) importClassesFrom(S4Vectors,Annotated) importClassesFrom(S4Vectors,character_OR_NULL) importClassesFrom(SingleCellExperiment,LinearEmbeddingMatrix) @@ -52,13 +51,10 @@ importFrom(RColorBrewer,brewer.pal) importFrom(Rcpp,evalCpp) importFrom(S4Vectors,make_zero_col_DFrame) importFrom(SummarizedExperiment,assay) -importFrom(biomaRt,getBM) -importFrom(biomaRt,useEnsembl) importFrom(cluster,agnes) importFrom(dplyr,relocate) importFrom(forcats,fct_reorder) importFrom(ggplot2,aes) -importFrom(ggplot2,aes_string) importFrom(ggplot2,coord_flip) importFrom(ggplot2,geom_col) importFrom(ggplot2,geom_hline) diff --git a/R/class-CogapsResult.R b/R/class-CogapsResult.R index 9aa26e5a..da84da4f 100755 --- a/R/class-CogapsResult.R +++ b/R/class-CogapsResult.R @@ -154,25 +154,36 @@ setGeneric("getPatternMatrix", function(object) setGeneric("getMeanChiSq", function(object) {standardGeneric("getMeanChiSq")}) -#' generate statistics associating patterns with MSigDB hallmark gene sets +#' generate statistics associating patterns with gene sets #' @export #' @docType methods -#' @rdname getPatternHallmarks-methods -#' @aliases getPatternHallmarks +#' @rdname getPatternGeneSet-methods +#' @aliases getPatternGeneSet #' @param object an object of type CogapsResult -#' @return dataframe of hallmark info -setGeneric("getPatternHallmarks", function(object) standardGeneric("getPatternHallmarks")) +#' @param gene.sets a list of gene sets to test. List names should be the names of the gene sets +#' @param method enrichment or overrepresentation. Conducts a test for gene set enrichment using {fgsea::gsea} ranking features by pattern amplitude or a test for gene set overrepresentation in pattern markers using {fgsea::fora}, respectively. +#' @param ... additional parameters passed to {patternMarkers} if using overrepresentation method +#' @return list of dataframes containing gene set enrichment or gene set overrepresentation statistics +#' @examples +#' data(GIST) +#' gs.test <- list( +#' "gs1" = c("Hs.2", "Hs.4", "Hs.36", "Hs.96", "Hs.202"), +#' "gs2" = c("Hs.699463", "Hs.699288", "Hs.699280", "Hs.699154", "Hs.697294") +#' ) +#' getPatternGeneSet(object = GIST.result, gene.sets = gs.test, method = "enrichment") +#' getPatternGeneSet(object = GIST.result, gene.sets = gs.test, method = "overrepresentation") +setGeneric("getPatternGeneSet", function(object, gene.sets, method = c("enrichment", "overrepresentation"), ...) {standardGeneric("getPatternGeneSet")}) #' generate a barchart of most significant hallmark sets for a pattern #' @export #' @docType methods -#' @rdname plotPatternHallmarks-methods -#' @aliases plotPatternHallmarks -#' @param object an object of type CogapsResult -#' @param patternhallmarks output from getPatternHallmarks +#' @rdname plotPatternGeneSet-methods +#' @aliases plotPatternGeneSet +#' @param patterngeneset output from getPatternGeneSet #' @param whichpattern which pattern to generate bar chart for +#' @param padj_threshold maximum adjusted p-value of gene sets rendered on the resulting plot #' @return image object of barchart -setGeneric("plotPatternHallmarks", function(object, patternhallmarks, whichpattern=1) standardGeneric("plotPatternHallmarks")) +setGeneric("plotPatternGeneSet", function(patterngeneset, whichpattern=1, padj_threshold = 0.05) standardGeneric("plotPatternGeneSet")) #' return version number used to generate this result #' @export diff --git a/R/methods-CogapsResult.R b/R/methods-CogapsResult.R index 5ae93687..7aedab6b 100755 --- a/R/methods-CogapsResult.R +++ b/R/methods-CogapsResult.R @@ -292,101 +292,99 @@ unitVector <- function(n, length) return(vec) } -#' @rdname getPatternHallmarks-methods -#' @import msigdbr +#' @rdname getPatternGeneSet-methods #' @import dplyr -#' @importFrom biomaRt useEnsembl getBM #' @import fgsea #' @importFrom forcats fct_reorder -#' @aliases getPatternHallmarks -setMethod("getPatternHallmarks", signature(object="CogapsResult"), -function(object) +#' @aliases getPatternGeneSet +setMethod("getPatternGeneSet", signature(object="CogapsResult", gene.sets="list", method = "character"), +function(object, gene.sets, method = c("enrichment", "overrepresentation"), ...) { - # List of MsigDB hallmarks and the genes in each set - hallmark_df <- msigdbr(species = "Homo sapiens", category = "H") - hallmark_list <- hallmark_df %>% split(x = .$gene_symbol, f = .$gs_name) - - # obtain universe of all human hgnc gene symbols from ensembl - mart <- useEnsembl('genes', dataset = "hsapiens_gene_ensembl") #GRCh38 - Hs_genes <- getBM("hgnc_symbol", mart = mart) - - patternMarkerResults <- patternMarkers(object, threshold = "cut", lp = NA, axis = 1) - names(patternMarkerResults$PatternMarkers) <- colnames(patternMarkerResults$PatternMarkerRanks) - - # List of each gene as a pattern marker - PMlist <- patternMarkerResults$PatternMarkers - - d <- list() - for (pattern in names(PMlist)) - { - # Over-representation analysis of pattern markers ########################### - suppressWarnings( - result <- fora(pathways = hallmark_list, - genes = PMlist[[pattern]], - universe = Hs_genes$hgnc_symbol, - maxSize=2038) + method <- match.arg(method) + A <- object@featureLoadings + patternNames <- colnames(A) + features <- rownames(A) + # enrichment method + if(method == "enrichment") { + d <- lapply( + patternNames, FUN = function(p) { + amp <- A[,p] + names(amp) <- features + result <- fgsea::fgsea(pathways = gene.sets, stats = amp, scoreType = "pos") + result$leadingEdge <- vapply(result$leadingEdge, FUN = toString, FUN.VALUE = character(1)) + result$neg.log.padj <- (-10) * log10(result$padj) + result$gene.set <- names(gene.sets) + result <- dplyr::mutate(result,gene.set=forcats::fct_reorder(gene.set, - padj)) + return(result) + } ) + } + + # overrepresentation method + if(method == "overrepresentation") { + patternMarkerResults <- patternMarkers(object, ...) + names(patternMarkerResults$PatternMarkers) <- patternNames - # Append ratio of overlap/# of genes in hallmark (k/K) - result$"k/K" <- result$overlap/result$size - - # Append log-transformed HB-adjusted q value (-10 * log(adjusted p value)) - result$"neg.log.q" <- (-10) * log10(result$padj) - - # Append shortened version of hallmark's name without "HALLMARK_" - result$MsigDB_Hallmark <- substr(result$pathway, 10, nchar(result$pathway)) - - # Reorder dataframe by ascending adjusted p value - result <- mutate(result, MsigDB_Hallmark=fct_reorder(MsigDB_Hallmark, - padj)) - - d[[length(d)+1]] <- result - + PMlist <- patternMarkerResults$PatternMarkers + d <- lapply( + patternNames, FUN = function(p) { + result <- fgsea::fora( + pathways = gene.sets, genes = PMlist[[p]], + universe = features, + maxSize=2038) + result[["k/K"]] <- result$overlap / result$size + result$neg.log.padj <- (-10) * log10(result$padj) + result$gene.set <- names(gene.sets) + result <- dplyr::mutate(result,gene.set=forcats::fct_reorder(gene.set, - padj)) + return(result) + } + ) } - return(d) + return(d) }) -#' @rdname plotPatternHallmarks-methods +#' @rdname plotPatternGeneSet-methods #' @importFrom dplyr relocate -#' @importFrom ggplot2 ggplot aes_string geom_col ylab coord_flip theme_minimal ggtitle geom_hline geom_text theme aes ylim +#' @importFrom ggplot2 ggplot geom_col ylab coord_flip theme_minimal ggtitle geom_hline geom_text theme aes ylim #' @importFrom graphics plot legend lines points -#' @aliases plotPatternHallmarks -setMethod("plotPatternHallmarks", signature(object="CogapsResult", patternhallmarks = "list", whichpattern="numeric"), -function(object, patternhallmarks, whichpattern=1) +#' @aliases plotPatternGeneSet +setMethod("plotPatternGeneSet", signature(patterngeneset = "list", whichpattern="numeric", padj_threshold = "numeric"), +function(patterngeneset, whichpattern=1, padj_threshold = 0.05) { # should be able to pass in info about just one pattern, or to pass all info and specify which pattern - if (length(patternhallmarks) > 1){ - patternhallmarks <- patternhallmarks[[whichpattern]] + if (length(patterngeneset) > 1){ + gs_df <- patterngeneset[[whichpattern]] + } else { + gs_df <- patterngeneset[1] + } + # check the test type conducted to inform the title of the resulting plot + if("k/K" %in% colnames(gs_df)) { + method_name <- "Overrepresented" + } else { + method_name <- "Enriched" } - - # rearrange columns to move overlap genes to the end and convert to vector for - # compatibility with saving as csv - patternhallmarks <- relocate(patternhallmarks, "overlapGenes", .after = "MsigDB_Hallmark") - patternhallmarks <- relocate(patternhallmarks, "neg.log.q", .after = "padj") - patternhallmarks <- patternhallmarks %>% mutate(overlapGenes = sapply(overlapGenes, toString)) # for plotting, limit the results to significant over-representation - patternhallmarks <- patternhallmarks[patternhallmarks$pval < 0.05,] + gs_df <- gs_df[gs_df$padj < padj_threshold,] + neg.log.hline <- -10*log10(0.05) + + axis_max <- ceiling(max(gs_df$neg.log.padj)) + (max(gs_df$neg.log.padj)/4) #plot and save the waterfall plot of ORA p-values - plot <- ggplot(patternhallmarks, aes_string(y = "neg.log.q", x = "MsigDB_Hallmark", fill = "MsigDB_Hallmark")) + - ## Specifies barplot + plot <- ggplot(gs_df, aes(y = neg.log.padj, x = gene.set, fill = gene.set)) + geom_col() + - ## Rename y axis - ylab("-10*log10(FDR q-value)") + - ## Flips the coordinates + ylab("-10*log10(FDR-adjusted p-value)") + coord_flip() + - ## Makes the background white theme_minimal() + - ## Add title - ggtitle(paste0("Overrepresented MsigDB Hallmarks in Pattern", whichpattern)) + - ## This creates the dotted line at .05 value - geom_hline(yintercept=c(13.0103), linetype="dotted") + # Add veritcle line to show significances - ## Adds the q values + ggtitle(paste0(method_name, " gene sets in Pattern_", whichpattern)) + geom_text(aes(label=format(signif(padj, 4))), hjust = -.04) + - ## Removes legend theme(legend.position = "none") + - ## specifies limits - ylim(0, ceiling(max(patternhallmarks$"neg.log.q")) + (max(patternhallmarks$"neg.log.q")/4)) + ylim(0, axis_max) + + if(neg.log.hline <= axis_max) { + plot <- plot + + geom_hline(yintercept=neg.log.hline, linetype="dotted") + } return(plot) }) diff --git a/man/getPatternGeneSet-methods.Rd b/man/getPatternGeneSet-methods.Rd new file mode 100644 index 00000000..9c94db40 --- /dev/null +++ b/man/getPatternGeneSet-methods.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R +\docType{methods} +\name{getPatternGeneSet} +\alias{getPatternGeneSet} +\alias{getPatternGeneSet,CogapsResult,list,character-method} +\title{generate statistics associating patterns with gene sets} +\usage{ +getPatternGeneSet( + object, + gene.sets, + method = c("enrichment", "overrepresentation"), + ... +) + +\S4method{getPatternGeneSet}{CogapsResult,list,character}( + object, + gene.sets, + method = c("enrichment", "overrepresentation"), + ... +) +} +\arguments{ +\item{object}{an object of type CogapsResult} + +\item{gene.sets}{a list of gene sets to test. List names should be the names of the gene sets} + +\item{method}{enrichment or overrepresentation. Conducts a test for gene set enrichment using {fgsea::gsea} ranking features by pattern amplitude or a test for gene set overrepresentation in pattern markers using {fgsea::fora}, respectively.} + +\item{...}{additional parameters passed to {patternMarkers} if using overrepresentation method} +} +\value{ +list of dataframes containing gene set enrichment or gene set overrepresentation statistics +} +\description{ +generate statistics associating patterns with gene sets +} +\examples{ +data(GIST) +gs.test <- list( +"gs1" = c("Hs.2", "Hs.4", "Hs.36", "Hs.96", "Hs.202"), +"gs2" = c("Hs.699463", "Hs.699288", "Hs.699280", "Hs.699154", "Hs.697294") +) +getPatternGeneSet(object = GIST.result, gene.sets = gs.test, method = "enrichment") +getPatternGeneSet(object = GIST.result, gene.sets = gs.test, method = "overrepresentation") +} diff --git a/man/getPatternHallmarks-methods.Rd b/man/getPatternHallmarks-methods.Rd deleted file mode 100644 index af633581..00000000 --- a/man/getPatternHallmarks-methods.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R -\docType{methods} -\name{getPatternHallmarks} -\alias{getPatternHallmarks} -\alias{getPatternHallmarks,CogapsResult-method} -\title{generate statistics associating patterns with MSigDB hallmark gene sets} -\usage{ -getPatternHallmarks(object) - -\S4method{getPatternHallmarks}{CogapsResult}(object) -} -\arguments{ -\item{object}{an object of type CogapsResult} -} -\value{ -dataframe of hallmark info -} -\description{ -generate statistics associating patterns with MSigDB hallmark gene sets -} diff --git a/man/plotPatternGeneSet-methods.Rd b/man/plotPatternGeneSet-methods.Rd new file mode 100644 index 00000000..8205c866 --- /dev/null +++ b/man/plotPatternGeneSet-methods.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R +\docType{methods} +\name{plotPatternGeneSet} +\alias{plotPatternGeneSet} +\alias{plotPatternGeneSet,list,numeric,numeric-method} +\title{generate a barchart of most significant hallmark sets for a pattern} +\usage{ +plotPatternGeneSet(patterngeneset, whichpattern = 1, padj_threshold = 0.05) + +\S4method{plotPatternGeneSet}{list,numeric,numeric}(patterngeneset, whichpattern = 1, padj_threshold = 0.05) +} +\arguments{ +\item{patterngeneset}{output from getPatternGeneSet} + +\item{whichpattern}{which pattern to generate bar chart for} + +\item{padj_threshold}{maximum adjusted p-value of gene sets rendered on the resulting plot} +} +\value{ +image object of barchart +} +\description{ +generate a barchart of most significant hallmark sets for a pattern +} diff --git a/man/plotPatternHallmarks-methods.Rd b/man/plotPatternHallmarks-methods.Rd deleted file mode 100644 index 242d375c..00000000 --- a/man/plotPatternHallmarks-methods.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R -\docType{methods} -\name{plotPatternHallmarks} -\alias{plotPatternHallmarks} -\alias{plotPatternHallmarks,CogapsResult,list,numeric-method} -\title{generate a barchart of most significant hallmark sets for a pattern} -\usage{ -plotPatternHallmarks(object, patternhallmarks, whichpattern = 1) - -\S4method{plotPatternHallmarks}{CogapsResult,list,numeric}(object, patternhallmarks, whichpattern = 1) -} -\arguments{ -\item{object}{an object of type CogapsResult} - -\item{patternhallmarks}{output from getPatternHallmarks} - -\item{whichpattern}{which pattern to generate bar chart for} -} -\value{ -image object of barchart -} -\description{ -generate a barchart of most significant hallmark sets for a pattern -} diff --git a/tests/testthat/test_getPatternGeneSet.R b/tests/testthat/test_getPatternGeneSet.R new file mode 100644 index 00000000..ab533b9d --- /dev/null +++ b/tests/testthat/test_getPatternGeneSet.R @@ -0,0 +1,47 @@ +test_that("getPatternGeneSet works on enrichment test", { + #set up + data(GIST) + gs.test <- list( + "gs1" = c("Hs.2", "Hs.4", "Hs.36", "Hs.96", "Hs.202"), + "gs2" = c("Hs.699463", "Hs.699288", "Hs.699280", "Hs.699154", "Hs.697294") + ) + expect_no_error(getPatternGeneSet(object = GIST.result, gene.sets = gs.test, method = "enrichment")) +}) + +test_that("getPatternGeneSet works on overrepresentation test", { + #set up + data(GIST) + gs.test <- list( + "gs1" = c("Hs.2", "Hs.4", "Hs.36", "Hs.96", "Hs.202"), + "gs2" = c("Hs.699463", "Hs.699288", "Hs.699280", "Hs.699154", "Hs.697294") + ) + expect_no_error(getPatternGeneSet(object = GIST.result, gene.sets = gs.test, method = "overrepresentation")) +}) + +test_that("plotPatternGeneSet renders a plot for enrichment test", { + #set up + data(GIST) + gs.test <- list( + "gs1" = c("Hs.2", "Hs.4", "Hs.36", "Hs.96", "Hs.202"), + "gs2" = c("Hs.699463", "Hs.699288", "Hs.699280", "Hs.699154", "Hs.697294") + ) + gpgs_res <- getPatternGeneSet(object = GIST.result, gene.sets = gs.test, method = "enrichment") + pl <- plotPatternGeneSet(patterngeneset = gpgs_res, whichpattern = 1, padj_threshold = 1) + + expect_is(pl$layers[[1]], "ggproto") + expect_match(pl$labels$title, "Enriched gene sets in Pattern_1") +}) + +test_that("plotPatternGeneSet renders a plot for overrepresentation test", { + #set up + data(GIST) + gs.test <- list( + "gs1" = c("Hs.2", "Hs.4", "Hs.36", "Hs.96", "Hs.202"), + "gs2" = c("Hs.699463", "Hs.699288", "Hs.699280", "Hs.699154", "Hs.697294") + ) + gpgs_res <- getPatternGeneSet(object = GIST.result, gene.sets = gs.test, method = "overrepresentation", threshold = "cut") + pl <- plotPatternGeneSet(patterngeneset = gpgs_res, whichpattern = 1, padj_threshold = 1) + + expect_is(pl$layers[[1]], "ggproto") + expect_match(pl$labels$title, "Overrepresented gene sets in Pattern_1") +}) diff --git a/vignettes/CoGAPS.Rmd b/vignettes/CoGAPS.Rmd index 6e51a3fb..67edc644 100755 --- a/vignettes/CoGAPS.Rmd +++ b/vignettes/CoGAPS.Rmd @@ -89,7 +89,7 @@ simulated toy data for a test run. Single-cell data will be loaded later in this file. ```{r modsim} -load('../data/modsimdata.rda') +data('modsimdata') # input to CoGAPS modsimdata ``` @@ -220,7 +220,7 @@ cogapsresult@sampleFactors cogapsresult@featureLoadings # check reference result: -load('../data/modsimresult.rda') +data('modsimresult') ``` If both matrices--sampleFactors and featureLoadings--have reasonable @@ -420,12 +420,24 @@ pm <- patternMarkers(cogapsresult, threshold="cut") ``` # Pattern GSEA -One way to explore and use CoGAPS patterns is to conduct gene set enrichment analysis by functionally annotating the genes which are significant for each pattern. The getPatternHallmarks function provides a wrapper around the fgsea fora method and associates each pattern with msigDB hallmark pathway annotations using the list of marker genes attained from the patternMarkers statistic. +One way to explore and use CoGAPS patterns is to conduct gene set enrichment analysis by functionally annotating the genes which are significant for each pattern. The getPatternGeneSet function provides a wrapper around the gsea and fora methods from fgsea for gene set enrichment in CoGAPS pattern amplitudes or gene set overrepresentation in pattern markers, respectively. Gene sets for testing are provided as a list to allow testing of gene sets from many sources, such as hallmark gene sets from [msigDB](https://www.gsea-msigdb.org/gsea/msigdb/) or gene ontology sets from [org.Hs.eg.db](https://bioconductor.org/packages/release/data/annotation/html/org.Hs.eg.db.html). -To perform gene set analysis on pattern markers, please run: +To perform gene set overrepresentation on pattern markers, please run: ```{r get hallmarks} -hallmarks <- getPatternHallmarks(cogapsresult) +hallmark_ls <- list( + "HALLMARK_ALLOGRAFT_REJECTION" = c( + "AARS1","ABCE1","ABI1","ACHE","ACVR2A","AKT1","APBB1","B2M","BCAT1","BCL10","BCL3","BRCA1","C2","CAPG","CARTPT","CCL11","CCL13","CCL19","CCL2","CCL22","CCL4","CCL5","CCL7","CCND2","CCND3","CCR1","CCR2","CCR5","CD1D","CD2","CD247","CD28","CD3D","CD3E","CD3G","CD4","CD40","CD40LG","CD47","CD7","CD74","CD79A","CD80","CD86","CD8A","CD8B","CD96","CDKN2A","CFP","CRTAM","CSF1","CSK","CTSS","CXCL13","CXCL9","CXCR3","DARS1","DEGS1","DYRK3","EGFR","EIF3A","EIF3D","EIF3J","EIF4G3","EIF5A","ELANE","ELF4","EREG","ETS1","F2","F2R","FAS","FASLG","FCGR2B","FGR","FLNA","FYB1","GALNT1","GBP2","GCNT1","GLMN","GPR65","GZMA","GZMB","HCLS1","HDAC9","HIF1A","HLA-A","HLA-DMA","HLA-DMB","HLA-DOA","HLA-DOB","HLA-DQA1","HLA-DRA","HLA-E","HLA-G","ICAM1","ICOSLG","IFNAR2","IFNG","IFNGR1","IFNGR2","IGSF6","IKBKB","IL10","IL11","IL12A","IL12B","IL12RB1","IL13","IL15","IL16","IL18","IL18RAP","IL1B","IL2","IL27RA","IL2RA","IL2RB","IL2RG","IL4","IL4R","IL6","IL7","IL9","INHBA","INHBB","IRF4","IRF7","IRF8","ITGAL","ITGB2","ITK","JAK2","KLRD1","KRT1","LCK","LCP2","LIF","LTB","LY75","LY86","LYN","MAP3K7","MAP4K1","MBL2","MMP9","MRPL3","MTIF2","NCF4","NCK1","NCR1","NLRP3","NME1","NOS2","NPM1","PF4","PRF1","PRKCB","PRKCG","PSMB10","PTPN6","PTPRC","RARS1","RIPK2","RPL39","RPL3L","RPL9","RPS19","RPS3A","RPS9","SIT1","SOCS1","SOCS5","SPI1","SRGN","ST8SIA4","STAB1","STAT1","STAT4","TAP1","TAP2","TAPBP","TGFB1","TGFB2","THY1","TIMP1","TLR1","TLR2","TLR3","TLR6","TNF","TPD52","TRAF2","TRAT1","UBE2D1","UBE2N","WARS1","WAS","ZAP70" + ), + "HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION" = c( + "ABI3BP","ACTA2","ADAM12","ANPEP","APLP1","AREG","BASP1","BDNF","BGN","BMP1","CADM1","CALD1","CALU","CAP2","CAPG","CD44","CD59","CDH11","CDH2","CDH6","COL11A1","COL12A1","COL16A1","COL1A1","COL1A2","COL3A1","COL4A1","COL4A2","COL5A1","COL5A2","COL5A3","COL6A2","COL6A3","COL7A1","COL8A2","COMP","COPA","CRLF1","CCN2","CTHRC1","CXCL1","CXCL12","CXCL6","CCN1","DAB2","DCN","DKK1","DPYSL3","DST","ECM1","ECM2","EDIL3","EFEMP2","ELN","EMP3","ENO2","FAP","FAS","FBLN1","FBLN2","FBLN5","FBN1","FBN2","FERMT2","FGF2","FLNA","FMOD","FN1","FOXC2","FSTL1","FSTL3","FUCA1","FZD8","GADD45A","GADD45B","GAS1","GEM","GJA1","GLIPR1","COLGALT1","GPC1","GPX7","GREM1","HTRA1","ID2","IGFBP2","IGFBP3","IGFBP4","IL15","IL32","IL6","CXCL8","INHBA","ITGA2","ITGA5","ITGAV","ITGB1","ITGB3","ITGB5","JUN","LAMA1","LAMA2","LAMA3","LAMC1","LAMC2","P3H1","LGALS1","LOX","LOXL1","LOXL2","LRP1","LRRC15","LUM","MAGEE1","MATN2","MATN3","MCM7","MEST","MFAP5","MGP","MMP1","MMP14","MMP2","MMP3","MSX1","MXRA5","MYL9","MYLK","NID2","NNMT","NOTCH2","NT5E","NTM","OXTR","PCOLCE","PCOLCE2","PDGFRB","PDLIM4","PFN2","PLAUR","PLOD1","PLOD2","PLOD3","PMEPA1","PMP22","POSTN","PPIB","PRRX1","PRSS2","PTHLH","PTX3","PVR","QSOX1","RGS4","RHOB","SAT1","SCG2","SDC1","SDC4","SERPINE1","SERPINE2","SERPINH1","SFRP1","SFRP4","SGCB","SGCD","SGCG","SLC6A8","SLIT2","SLIT3","SNAI2","SNTB1","SPARC","SPOCK1","SPP1","TAGLN","TFPI2","TGFB1","TGFBI","TGFBR3","TGM2","THBS1","THBS2","THY1","TIMP1","TIMP3","TNC","TNFAIP3","TNFRSF11B","TNFRSF12A","TPM1","TPM2","TPM4","VCAM1","VCAN","VEGFA","VEGFC","VIM","WIPF1","WNT5A" + ) +) + +hallmarks_ora <- getPatternGeneSet( + cogapsresult, gene.sets = hallmark_ls, method = "overrepresentation", + threshold = "cut", lp = NA, axis = 1 # patternMarker parameters to match PDAC atlas result +) ``` hallmarks is a list of data frames, each containing hallmark overrepresentation statistics corresponding to one pattern. @@ -433,7 +445,9 @@ hallmarks is a list of data frames, each containing hallmark overrepresentation To generate a barchart of the most significant hallmarks for any given pattern, please run: ```{r plot hallmarks} -pl_pattern7 <- plotPatternHallmarks(cogapsresult, hallmarks, whichpattern = 7) +pl_pattern7 <- plotPatternGeneSet( + patterngeneset = hallmarks_ora, whichpattern = 7, padj_threshold = 0.05 +) pl_pattern7 ``` To generate statistics on the association between certain sample groups and patterns, we provide a wrapper function, called MANOVA.This will allow us to explore if the patterns we have discovered lend to statistically significant differences in the sample groups. We will first load in the original data (if not already done earlier).