diff --git a/server.R b/server.R new file mode 100644 index 0000000..e76df13 --- /dev/null +++ b/server.R @@ -0,0 +1,292 @@ +library(shiny) +source("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/src/geneplots.R") + +## functions +Scores2Percentiles <- function(scores) { + stopifnot(is.numeric(scores)) + return((rank(scores) / length(scores)) * 100) +} + +# for domains +dmn.data <- ReadSubrgnData("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/domains_table.txt") +dmn.data$perc <- Scores2Percentiles(dmn.data$subRVIS) +dmn.bed <- ReadSubrgnBed("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/domains.bed") +dmn.map <- ReadDomainMap("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/cdd_table.txt") +dmn.pvals <- ReadGenePvals("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/domains_gene_pvals.txt") +dmn.sds <- ReadGeneSds("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/domains_gene_sds.txt") + +# for exons +exn.data <- ReadSubrgnData("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/exons_table.txt") +exn.data$perc <- Scores2Percentiles(exn.data$subRVIS) +exn.bed <- ReadSubrgnBed("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/exons.bed") +exn.map <- NULL +exn.pvals <- ReadGenePvals("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/exons_gene_pvals.txt") +exn.sds <- ReadGeneSds("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/exons_gene_sds.txt") + +# for all +strand.map <- ReadStrandData("C:/Users/m119979/Desktop/subrvis-master/variant_plotting/data/strand_data.txt") + +# Define server logic +shinyServer(function(input, output) { + + # Expression that generates the plot. The expression is + # wrapped in a call to renderPlot to indicate that: + # + # 1) It is "reactive" and therefore should re-execute automatically + # when inputs change + # 2) Its output type is a plot + + GetGene <- eventReactive(input$plot, { + input$gene + }) + + GetRgn <- eventReactive(input$plot, { + input$rgn + }) + + GetScoreType <- eventReactive(input$plot, { + input$score.type + }) + + + GetVariants <- eventReactive(input$plot, { + + if (is.null(input$variants) || input$variants == "") { + return(NULL) + } + + + variants <- tryCatch({ + read.table(textConnection(input$variants), header=F) + }, error=function(e) { + stop("Unrecognized format for variants.") + }) + + if (ncol(variants) != 2) { + stop("Variants should have two columns: chromosome and position.") + } + + + colnames(variants) <- c("chrom", "pos") + if (!is.numeric(variants$pos)){ + stop("Variant position should be numeric.") + } + + return(variants) + }) + + GetPlotData <- eventReactive(input$plot, { + rgn <- GetRgn() + score.type <- GetScoreType() + + if (rgn == "Domains") { + subrgn.data <- dmn.data + subrgn.bed <- dmn.bed + subrgn.map <- dmn.map + gene.pvals <- dmn.pvals + gene.sds <- dmn.sds + } else { + subrgn.data <- exn.data + subrgn.bed <- exn.bed + subrgn.map <- exn.map + gene.pvals <- exn.pvals + gene.sds <- exn.sds + } + + gene <- GetGene() + if (!(toupper(gene) %in% toupper(subrgn.data$gene))) { + err.msg <- paste("Gene", gene, "cannot be found.") + stop(err.msg) + } + + strand <- strand.map[toupper(rownames(strand.map)) == toupper(gene), ] + if((length(strand) != 1) || (!(strand %in% c("+", "-")))) { + err.msg <- paste("Gene", gene, "has no strand data.") + stop(err.msg) + } + + + variants <- GetVariants() + + gene.subrgns <- GetGeneSubrgn(subrgn.data, subrgn.bed, subrgn.map, gene, rgn, score.type) + gene.name <- unique(gene.subrgns$gene) + stopifnot(length(gene.name)==1) + + p.val <- gene.pvals[toupper(gene.name), ] + gene.sd <- gene.sds[toupper(gene.name), "percentile"] + gene.bed <- subrgn.bed[toupper(subrgn.bed$gene) == toupper(gene), ] + + return(list(gene.subrgns = gene.subrgns, + subrgn.bed = subrgn.bed, + gene.bed = gene.bed, + gene.name = gene.name, + p.val = p.val, + gene.sd = gene.sd, + variants = variants, + rgn = rgn, + score.type = score.type, + strand = strand)) + }) + + + output$genePlot <- renderPlot({ + + plot.data <- GetPlotData() + + gene.plot <- PlotGeneSkel(plot.data$gene.subrgns, plot.data$gene.name, plot.data$rgn, plot.data$score.type) + + gene.plot <- PlotConnectors(gene.plot, plot.data$gene.subrgns, plot.data$strand) + if (plot.data$strand == "-") { + gene.plot <- gene.plot + scale_x_reverse(label=function(x) -(x-max(plot.data$gene.subrgns$end))) + } + + if (!is.null(plot.data$variants)) { + gene.plot <- AddVariants(gene.plot, plot.data$gene.subrgns, plot.data$subrgn.bed, plot.data$variants, plot.data$strand) + } + + ylab.txt <- ifelse(plot.data$score.type=="Percentiles", "subRVIS Percentile", "subRVIS Raw Score") + plot(gene.plot + labs(x=paste0("Gene Position\n", + "subRVIS P-value for known pathogenic mutations: ", + round(plot.data$p.val, digits=3), + "\nsubRVIS SDP: ", + round(plot.data$gene.sd, digits=1), + "%"), + y=ylab.txt)) + }) + + output$var.table = renderDataTable({ + plot.data <- GetPlotData() + + plot.table <- plot.data$gene.subrgns + plot.table$var.count <- rep(0, nrow(plot.table)) + + gene.chrom <- unique(plot.data$gene.bed$chrom) + stopifnot(length(gene.chrom) == 1) + + variants <- plot.data$variants + variants <- variants[variants$chrom == gene.chrom, ] + + positions <- variants$pos - 1 # convert to 0-based BED coords + + for (pos in positions) { + subrgn.name <- plot.data$gene.bed[pos >= plot.data$gene.bed$start & pos < plot.data$gene.bed$end, "subrgn"] + stopifnot(length(subrgn.name) <= 1) + + if(length(subrgn.name) == 1) { + plot.table[subrgn.name, "var.count"] <- plot.table[subrgn.name, "var.count"] + 1 + } + + } + + plot.table$name <- rownames(plot.table) + + cols.to.plot <- c("name", "start", "end", "size", "coverage_percent", "subRVIS", plot.data$rgn, "var.count") + + plot.table <- plot.table[, cols.to.plot] + plot.table$start <- plot.table$start + 1 + + if((plot.data$strand == "-") && (nrow(plot.table) != 1)) { + # Need to reverse coordinates + plot.table <- plot.table[order(-plot.table$start), ] + plot.table$start[1] <- 1 + plot.table$end[1] <- plot.table$size[1] + for (i in 2:nrow(plot.table)) { + plot.table[i, "start"] <- plot.table$end[i-1] + 1 + plot.table$end[i] <- plot.table$size[i] + plot.table$start[i] - 1 + } + } + + colnames(plot.table) <- c("name", "start", "end", "size", + "percent_covered", "score", + "sub-region", "variant_count") + + return(plot.table) + }) + + output$downloadData <- downloadHandler( + filename = function() { + paste('subRVIS table','.csv', sep='') + }, + content = function(file){ + plot.data <- GetPlotData() + + plot.table <- plot.data$gene.subrgns + plot.table$var.count <- rep(0, nrow(plot.table)) + + gene.chrom <- unique(plot.data$gene.bed$chrom) + stopifnot(length(gene.chrom) == 1) + + variants <- plot.data$variants + variants <- variants[variants$chrom == gene.chrom, ] + + positions <- variants$pos - 1 # convert to 0-based BED coords + + for (pos in positions) { + subrgn.name <- plot.data$gene.bed[pos >= plot.data$gene.bed$start & pos < plot.data$gene.bed$end, "subrgn"] + stopifnot(length(subrgn.name) <= 1) + + if(length(subrgn.name) == 1) { + plot.table[subrgn.name, "var.count"] <- plot.table[subrgn.name, "var.count"] + 1 + } + + } + + plot.table$name <- rownames(plot.table) + + cols.to.plot <- c("name", "start", "end", "size", "coverage_percent", "subRVIS", plot.data$rgn, "var.count") + + plot.table <- plot.table[, cols.to.plot] + plot.table$start <- plot.table$start + 1 + + if((plot.data$strand == "-") && (nrow(plot.table) != 1)) { + # Need to reverse coordinates + plot.table <- plot.table[order(-plot.table$start), ] + plot.table$start[1] <- 1 + plot.table$end[1] <- plot.table$size[1] + for (i in 2:nrow(plot.table)) { + plot.table[i, "start"] <- plot.table$end[i-1] + 1 + plot.table$end[i] <- plot.table$size[i] + plot.table$start[i] - 1 + } + } + + colnames(plot.table) <- c("name", "start", "end", "size", + "percent_covered", "score", + "sub-region", "variant_count") + + table_data <- plot.table + + write.csv(table_data, file) + }) + + + output$downloadPlot <- downloadHandler( + filename = function() { + paste("subRVIS plot", ".svg", sep='') + }, + content = function(file) { + plot.data <- GetPlotData() + + gene.plot <- PlotGeneSkel(plot.data$gene.subrgns, plot.data$gene.name, plot.data$rgn, plot.data$score.type) + + gene.plot <- PlotConnectors(gene.plot, plot.data$gene.subrgns, plot.data$strand) + if (plot.data$strand == "-") { + gene.plot <- gene.plot + scale_x_reverse(label=function(x) -(x-max(plot.data$gene.subrgns$end))) + } + + if (!is.null(plot.data$variants)) { + gene.plot <- AddVariants(gene.plot, plot.data$gene.subrgns, plot.data$subrgn.bed, plot.data$variants, plot.data$strand) + } + + ylab.txt <- ifelse(plot.data$score.type=="Percentiles", "subRVIS Percentile", "subRVIS Raw Score") + + plot(gene.plot + labs(x=paste0("Gene Position\n", + "subRVIS P-value for known pathogenic mutations: ", + round(plot.data$p.val, digits=3), + "\nsubRVIS SDP: ", + round(plot.data$gene.sd, digits=1), + "%"), + y=ylab.txt)) + + ggsave(file, plot = last_plot (), dpi = 300, width = 12, height = 6, units = "in") + }) +}) \ No newline at end of file diff --git a/ui.R b/ui.R new file mode 100644 index 0000000..afd5e1e --- /dev/null +++ b/ui.R @@ -0,0 +1,102 @@ +library(shiny) + +# Define UI +shinyUI(fluidPage( + + # Google Analytics + tags$head(includeScript("google-analytics.js")), + + # Application title + titlePanel("Plot Variants Across subRVIS Regions"), + + # Sidebar + sidebarLayout( + sidebarPanel( + textInput("gene", label = h3("Gene Name"), + value = "MAPT"), + + tags$style(type="text/css", "textarea {width:100%}"), + tags$textarea(id = 'variants', + placeholder = 'Variants (build 37), e.g. chr1 10003', + rows = 8, ""), + radioButtons("rgn", NULL, + choices=c("Domains", "Exons"), + selected = NULL, + inline = T, + width = NULL), + radioButtons("score.type", NULL, + choices=c("Raw", "Percentiles"), + selected = NULL, + inline = T, + width = NULL), + actionButton("plot", label = "Plot"), + downloadButton('downloadData', 'Download Table'), + downloadButton('downloadPlot', 'Download Plot') + ), + + # Show a plot of the generated distribution + mainPanel( + tabsetPanel( + tabPanel('Plot', + plotOutput("genePlot")), + tabPanel('Table', + dataTableOutput("var.table")) + ) + )), + p("This online tool can be used to plot the domain or exon subRVIS scores of a given gene."), + p("To the right of the plot is a legend. For domains, the legend denotes each aligned + conserved domain, with the CDD ID in parentheses next to the domain name. For exons, + the legend denotes the exon number. Note that genes that are on the minus strand + are plotted in reverse, i.e. regardless of its strand the first base of the gene + is at position zero at the leftmost point of the plot."), + p("There is the option to plot either the raw subRVIS scores or the subRVIS percentiles. The raw subRVIS + scores are the scores obtained from directly applying the subRVIS methodology (see ", a("Gussow et al", href="http://www.genomebiology.com/2016/17/1/9", target="_blank"), "). + The subRVIS percentiles are a scaled version of the score, where the raw scores were converted to + percentiles across the genome. Though the raw scores offer superior resolution, the percentiles + are easier to interpret as they are put into the context of the rest of the sub regions + in the genome. Lower subRVIS scores and percentiles correspond to more intolerant regions. + We have found that 35% and lower is a reasonable threshold for the subRVIS percentiles - in other words, we are more inclined to believe + that a mutation is likely to cause disease when it falls in a sub region in the lower 35th percentile of + subRVIS scores. However, by no means does that mean that mutations falling in sub regions below this percentile + are necessarily pathogenic, nor does it mean that mutations falling in sub regions above this percentile + are necessarily benign."), + p("Underneath the plot are two subRVIS related measures, per gene, intended to aid in the + interpretation of the results. The first, labelled + \"subRVIS P-value for known pathogenic mutations\", + assesses the relationship between subRVIS scores and the distribution of known + pathogenic mutations (see ", a("Gussow et al", href="http://www.genomebiology.com/2016/17/1/9", target="_blank"), "). The subRVIS p-value for known pathogenic + mutations obviously is only reported for known disease causing genes."), + p("The second measure, labelled \"subRVIS SDP\", is intended as a general score + to help assess whether the location of a mutation in a gene is an important factor + in determining whether the mutation is pathogenic. Unlike the p-value for + pathogenic mutations, this score can be constructed for both genes already + implicated in diseases and for genes not currently implicated. + This score is the standard deviation of the gene's domain or exon raw subRVIS scores, converted to percentiles. + A higher SDP indicates a higher degree of variation in the intolerance scores across the + gene's regions. Though these scores can be useful in predicting whether we expect + pathogenic mutations to cluster in specific sub regions within a given gene, + currently there is not a relationship between these scores and whether known + pathogenic mutations actually do so."), + h3("About"), + p("SubRVIS is a gene sub region based score intended to help in the interpretation of human + sequence data. The sub regions are defined as the regions within the gene that aligned + to the Conserved Domain Database (http://www.ncbi.nlm.nih.gov/cdd/) and the unaligned regions + between each conserved domain alignment."), + p("In its current form, subRVIS is based upon allele frequency as represented + in whole exome sequence data from the NHLBI-ESP6500 data set. The score is designed to + rank the genic sub regions based on their intolerance to functional variation. A lower score + indicates a more intolerant sub region, while a higher score indicates a more tolerant sub region."), + p("The paper can be found ", a("here.", href="http://www.genomebiology.com/2016/17/1/9", target="_blank")), + h3("Terms of use"), + p("The content of the subRVIS site is intended strictly for educational + and research purposes. The data derived from this website may not be used + for any commercial purpose. The data from this website may not be + replicated on any other website without written consent."), + h3("Citation"), + p("Gussow AB, Petrovski S, Wang Q, Allen AS, Goldstein DB. + The intolerance to functional genetic variation of protein domains predicts the localization of pathogenic mutations within genes. + Genome Biology 2016, 17:9. doi:10.1186/s13059-016-0869-4."), + h3("Contact"), + p("For bug reports, please contact Ayal Gussow (abg2164@columbia.edu).") +)) +