v1.1.16

hartleys · Mar 1, 2016 · 1dc0b3f · 1dc0b3f
1 parent c7cd88b
commit 1dc0b3f
Show file tree

Hide file tree

Showing 819 changed files with 99,024 additions and 89,097 deletions.
diff --git a/JctSeqData_1.1.10.tar.gz → JctSeqData_1.1.16.tar.gz b/JctSeqData_1.1.10.tar.gz → JctSeqData_1.1.16.tar.gz
diff --git a/JunctionSeq-reference.pdf b/JunctionSeq-reference.pdf
diff --git a/JunctionSeq.pdf b/JunctionSeq.pdf
diff --git a/JunctionSeq/DESCRIPTION b/JunctionSeq/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: JunctionSeq
-Version: 1.1.10
+Version: 1.1.16
 Title: JunctionSeq: A Utility for Detection of Differential Exon and Splice-Junction Usage in RNA-Seq data
 Authors@R: c(person("Stephen", "Hartley", role = c("aut", "cre"),
            email = "JunctionSeq-contact@list.nih.gov", comment = "PhD"),

diff --git a/JunctionSeq/inst/NEWS → JunctionSeq/NEWS b/JunctionSeq/inst/NEWS → JunctionSeq/NEWS
@@ -1,3 +1,35 @@
+v1.1.14: 
+Minor bugfixes: (thanks to Warren McGee)
+  * When making html summary files with "use.multigene.aggregates" genes set to TRUE,
+    JunctionSeq will use shortened html file names in order to avoid filename length 
+    limitations on certain filesystems when there are numerous genes overlapping with
+    one another. Aggregate gene names will be truncated to only include the first and
+    last member genes iff there are more than 2 member genes, with a "..." in between.
+  * If minimalImageFilenames is FALSE, then Image files will similarly be renamed. Otherwise
+    they will be reduced in size even further, as per the v1.1.10 update. You can 
+    control the naming of the png files using the options: "minimalImageFilenames", 
+    "name.files.with.geneID", and "number.plots"
+  * Fixed a bug in the phenotype table on the HTML index page.
+
+Various minor "quality-of-life" improvements:
+  * When available, gene names will be included in all tables.
+  * The main index data table now includes mouseover text describing each column.
+  * Added additional information to each plot page.
+  * Added word wrapping to aggregate genes in the main index tables (thanks to Warren McGee).
+
+v1.1.12:
+  * Moved NEWS file to base directory
+  * Updated CITATION file with arxiv preprint.
+
+v1.1.10:
+  * Changed naming scheme for gene-profile plots, to reduce the length of the filenames. 
+  Some web hosts are not compatible with excessively long file names. When writeHTMLresults is TRUE,
+  JunctionSeq will now default to a shorter naming scheme in which the image files are named
+  by number rather than by gene. This behavior can be eliminated setting the
+  new minimalImageFilenames parameter to FALSE.
+  * Minor adjustments to the vignette.
+  * JunctionSeq is now built and tested on SL6 in R 3.2.3
+
 v1.1.3:
   * Very minor typographical fixes and clarifications in the vignette.
 

diff --git a/JunctionSeq/R/00.minor.utils.R b/JunctionSeq/R/00.minor.utils.R
@@ -42,6 +42,27 @@ simpleDeparse <- function(d){
   paste0(deparse(d),collapse="")
 }
 
+truncateAggregateGene <- function(g){
+  truncAG <- function(x){
+    if(! grepl("+",x,fixed=TRUE)){
+      x;
+    } else {
+      cells = strsplit(x,"+",fixed=TRUE)[[1]];
+      if(length(cells) > 2){
+        paste0(cells[1],"+...+",cells[length(cells)]);
+      } else {
+        x;
+      }
+    }
+  }
+
+  if(length(g) == 1){
+    return(truncAG(g));
+  } else {
+    return(sapply(g,truncAG));
+  }
+}
+
 simpleReportMem <- function(){
      if(requireNamespace("pryr", quietly=TRUE)){
        message("Mem used:")

diff --git a/JunctionSeq/R/03.AllClasses.R b/JunctionSeq/R/03.AllClasses.R
@@ -29,7 +29,7 @@ setClass( "JunctionSeqCountSet",
       countVectors = "matrix",
       altSizeFactors = "data.frame",
       plottingEstimates = "list",
-      plottingEstimatesVST = "list", #DEPRECIATED! VST-xform is fast enough that it's better to calculate them as needed.
+      plottingEstimatesVST = "list", #DEPRECATED! VST-xform is fast enough that it's better to calculate them as needed.
       geneLevelPlottingEstimates = "list",
       modelFitForHypothesisTest = "list", #USUALLY unused.
       modelFitForEffectSize = "list", #USUALLY unused.

diff --git a/JunctionSeq/R/file.output.R b/JunctionSeq/R/file.output.R
diff --git a/JunctionSeq/R/func.R b/JunctionSeq/R/func.R
@@ -226,7 +226,7 @@ runJunctionSeqAnalyses <- function(sample.files, sample.names, condition,
 
 
 writeCompleteResults <- function(jscs, outfile.prefix, 
-                            gzip.output = TRUE, FDR.threshold = 0.05,
+                            gzip.output = TRUE, FDR.threshold = 0.01,
                             save.allGenes = TRUE, save.sigGenes = TRUE, save.fit = FALSE, save.VST = FALSE,
                             save.bedTracks = TRUE,
                             save.jscs = FALSE,
@@ -739,27 +739,55 @@ readJunctionSeqCounts <- function(countfiles = NULL, countdata = NULL,
 ##############################################################################################################################################################################################################################
 
 mapGeneNames <- function(jscs, gene.names = NULL, gene.name.separator = "+", gene.multimap.separator = ","){
+  if(is.null(gene.names)){
+    jscs@flatGffGeneData$gene_name <- jscs@flatGffGeneData$geneID;
+    return(jscs)
+  }
+  if(class(gene.names) == "character"){
+    gene.names <- read.table(gene.names, sep='\t',header=TRUE,stringsAsFactors=FALSE);
+  }
+  if(class(gene.names) != "data.frame"){
+    stop("Error: gene.names must be a filename or a data frame!")
+  }
+  if(names(gene.names)[1] != "geneID"){
+    message("       (Assuming \"",names(gene.names)[1],"\" column is geneID)");
+  }
+  if(names(gene.names)[2] != "gene_name"){
+    message("       (Assuming \"",names(gene.names)[2],"\" column is gene_name)");
+  }
+
   jscs@flatGffGeneData$gene_name <- mapGeneNamesToList(jscs@flatGffGeneData$geneID, 
                                                        gene.names = gene.names,
                                                        gene.name.separator = gene.name.separator,
                                                        gene.multimap.separator = gene.multimap.separator)
+
+  if(ncol(gene.names) > 2){
+    for(i in 3:ncol(gene.names)){
+       jscs@flatGffGeneData[[names(gene.names)[i]]] <- mapGeneNamesToList(jscs@flatGffGeneData$geneID, 
+                                                       gene.names = gene.names,
+                                                       gene.name.separator = gene.name.separator,
+                                                       gene.multimap.separator = gene.multimap.separator,
+                                                       newID.column = 1, oldID.column = i);
+    }
+  }
+
   return(jscs)
 }
 
-mapGeneNamesToList <- function(geneIDs, gene.names = NULL, gene.name.separator = "+", gene.multimap.separator = ","){
+mapGeneNamesToList <- function(geneIDs, gene.names = NULL, gene.name.separator = "+", gene.multimap.separator = ",", oldID.column = 1, newID.column = 2){
   if(is.null(gene.names)){
     out = geneIDs
     names(out) = geneIDs
     return(out)
   }
   if(class(gene.names) != "data.frame"){
-    stop("Error: gene.names must be a data frame!")
+    stop("Error: gene.names must be a filename or a data frame!")
   }
   oldIDs <- as.character(geneIDs)
-  oldIDs.map <- as.character(gene.names[,1])
-  newIDs <- as.character(gene.names[,2])
+  oldIDs.map <- as.character(gene.names[,oldID.column])
+  newIDs <- as.character(gene.names[,newID.column])
 
-  oldID.list <- strsplit(geneIDs, "+", fixed=TRUE)
+  oldID.list <- strsplit(oldIDs, "+", fixed=TRUE)
 
   out <- sapply(oldID.list, function(gs){
      gs <- as.character(gs)

diff --git a/JunctionSeq/R/main.plotGenes.R b/JunctionSeq/R/main.plotGenes.R
@@ -128,6 +128,8 @@ buildAllPlots <- function(jscs,
       if(verbose) message("> buildAllPlots: Too many genes found. Only plotting the first ",max.gene.ct, " genes.")
     }
   }
+  #shortNameGeneList <- truncateAggregateGene(gene.list);
+
 
     if(verbose) message("> buildAllPlots: Starting plotting...")
 
@@ -204,6 +206,7 @@ buildAllPlots <- function(jscs,
                             number.plots = geneNum.strings,
                             css.file = html.cssFile, css.link = html.cssLink,
                             minimalImageFilenames = minimalImageFilenames,
+                            name.files.with.geneID = name.files.with.geneID,
                             compare.analysis.list = html.compare.results.list, verbose = verbose, debug.mode = debug.mode,
                             INTERNAL.VARS = INTERNAL.VARS)
      if(verbose) message("> buildAllPlots: Finished writing HTML results index.")
@@ -413,9 +416,9 @@ buildAllPlotsForGene <- function(geneID,jscs,
     geneName <- if(minimalImageFilenames){
       ""
     } else if(name.files.with.geneID){
-      paste0(geneID,"-");
+      paste0(truncateAggregateGene(geneID),"-");
     } else {
-      paste0(jscs@flatGffGeneData$gene_name[jscs@flatGffGeneData$geneID == geneID],"-");
+      paste0(truncateAggregateGene(jscs@flatGffGeneData$gene_name[jscs@flatGffGeneData$geneID == geneID]),"-");
     }
 
       if(is.null(plot.exon.results)){

diff --git a/JunctionSeq/inst/CITATION b/JunctionSeq/inst/CITATION
@@ -1,17 +1,28 @@
-citHeader("To cite package SpliceSeq in publications use:")
+
+#citHeader("To cite package JunctionSeq in publications use:")
 
 year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date, perl = TRUE)
 vers <- paste("R package version", meta$Version)
 
-citEntry(entry = "Manual",
-         title = "JunctionSeq: Utility for Detection of Differential Splice-Junction Usage",
-         author = personList(as.person("Stephen Hartley")),
-         year = year,
-         note = vers,
+bibentry(bibtype="Article",
+         title = "Detection and Visualization of Differential Splicing in RNA-Seq Data with JunctionSeq",
+         author = c( person("Stephen","Hartley","William"),
+                     person("James","Mullikin","C")),
+         year = 2015,
+         journal = "arXiv preprint arXiv:1512.06038",
+         url = "http://arxiv.org/abs/1512.06038",
+         textVersion = 
+         paste("Hartley SW, Mullikin JC. (2015) Detection and Visualization of Differential Splicing in RNA-Seq Data with JunctionSeq. arXiv preprint arXiv:1512.06038" ) )
 
-         textVersion =
-         paste0("Stephen Hartley,",
-                "(",
-                year,
-                "). JunctionSeq: Utility for Detection of Differential Splice-Junction Usage. ",
-                vers, "."))
+#bibentry(bibtype = "Manual",
+#         title = "JunctionSeq Package User Manual",
+#         author = c(person("Stephen", "Hartley", middle = "William")),
+#         year = year,
+#         note = vers,
+#         url = "http://hartleys.github.io/JunctionSeq/doc/JunctionSeq.pdf",
+#         textVersion =
+#         paste0("Stephen Hartley,",
+#                "(",
+#                year,
+#                "). JunctionSeq Package User Manual ",
+#                vers, "."))
diff --git a/JunctionSeq/inst/extdata/styles.css b/JunctionSeq/inst/extdata/styles.css
@@ -37,6 +37,14 @@ td {
         border-left: dotted 1px #ccc;
 	padding: 0.2em;
 }
+td.wrapping {
+	vertical-align: top;
+	border-right: dotted 1px #ccc;
+	border-left: dotted 1px #ccc;
+	padding: 0.2em;
+	word-wrap:break-words;
+	white-space:pre-wrap;
+}
 th {
 	font-weight: bold;
 	text-align: left;

diff --git a/JunctionSeq/man/readJunctionSeqCounts.Rd b/JunctionSeq/man/readJunctionSeqCounts.Rd
@@ -99,7 +99,7 @@ readJunctionSeqCounts(countfiles, countdata,
   }
   \item{gene.names}{
     data.frame. This optional parameter can be used to decoder the gene id's used in the actual analysis into gene symbols or gene names for general readability.
-    This must be a data.frame with two columns of character strings. The first must be the gene ID's, and the second must be the gene names (as you wish them to appear in the plots). Genes
+    This must be a text file name or data.frame with two columns of character strings. The first must be the gene ID's, and the second must be the gene names (as you wish them to appear in the plots). Genes
     are allowed to have multiple gene names, in which case they will be separated by commas.
     The gene names will be used in the plots and figures.
   }

diff --git a/JunctionSeq/man/runJunctionSeqAnalyses.Rd b/JunctionSeq/man/runJunctionSeqAnalyses.Rd
@@ -127,7 +127,7 @@ runJunctionSeqAnalyses(sample.files, sample.names, condition,
 
   \item{gene.names}{
     data.frame. This optional parameter can be used to decoder the gene id's used in the actual analysis into gene symbols or gene names for general readability.
-    This must be a data.frame with two columns of character strings. The first must be the gene ID's, and the second must be the gene names (as you wish them to appear in the plots). Genes
+    This must be a text file name or data.frame with two columns of character strings. The first must be the gene ID's, and the second must be the gene names (as you wish them to appear in the plots). Genes
     are allowed to have multiple gene names, in which case they will be separated by commas.
     The gene names will be used in the plots and figures.
   }

diff --git a/JunctionSeq/man/writeCompleteResults.Rd b/JunctionSeq/man/writeCompleteResults.Rd
@@ -14,7 +14,7 @@
 \usage{
 writeCompleteResults(jscs, outfile.prefix, 
                         gzip.output = TRUE,
-                        FDR.threshold = 0.05,
+                        FDR.threshold = 0.01,
                         save.allGenes = TRUE, save.sigGenes = TRUE,
                         save.fit = FALSE, save.VST = FALSE,
                         save.bedTracks = TRUE,

diff --git a/JunctionSeq/vignettes/JunctionSeq.Rnw b/JunctionSeq/vignettes/JunctionSeq.Rnw
@@ -411,6 +411,7 @@ The advantage to our method is that reads (or read-pairs) are never counted more
     \item[\texttt{test.formula1}]: The model formula used for the alternative hypothesis in the ANODEV analysis.
     \item[\texttt{effect.formula}]: The model formula used for estimating the effect size and parameter estimates.
     \item[\texttt{geneLevel.formula}]: The model formula used for estimating the gene-level expression.
+    \item[\texttt{gene.names}]: A text file or data.frame that contains two columns: first the geneID then the gene name. Many annotations do not use common gene symbols as the primary identifier for genes. However, full ensembl ID's (for example) are not suited to easy recognition or conversation. Thus, you can use this additional parameter to tell JunctionSeq to use the common gene names in tables and plots (while still using the geneIDs for identification purposes).
   \end{description} 
 
 A full description of all these options and how they are used can be accessed using the command:
@@ -440,12 +441,22 @@ design <- data.frame(condition = factor(decoder$group.ID));
 @
 Note: the experimental condition variable MUST be named "condition".
 
+Just to demonstrate one advanced feature: We can include the gene names file. This text file contains two columns: first the geneID then the gene name. Many annotations do not use common gene symbols as the primary identifier for genes. However, full ensembl ID's (for example) are not suited to easy recognition or conversation. Thus, you can use this additional parameter to tell JunctionSeq to use the common gene names in tables and plots (while still using the geneIDs for identification purposes).
+
+<<testStep0b, results="hide", warning=FALSE>>=
+geneID.to.symbol.file <- system.file(
+                            "extdata/annoFiles/ensid.2.symbol.txt",
+                            package="JctSeqData", 
+                            mustWork=TRUE);
+@
+
 Next, the data must be loaded into a JunctionSeqCountSet:
 <<testStep1, results="hide", warning=FALSE>>=
 jscs = readJunctionSeqCounts(countfiles = countFiles,
                              samplenames = decoder$sample.ID,
                              design = design,
-                             flat.gff.file = gff.file
+                             flat.gff.file = gff.file,
+                             gene.names = geneID.to.symbol.file
                              );
 @
 

diff --git a/JunctionSeq_1.1.10.tar.gz b/JunctionSeq_1.1.10.tar.gz
diff --git a/JunctionSeq_1.1.16.tar.gz b/JunctionSeq_1.1.16.tar.gz
diff --git a/R-CMD-BiocCheck-LINUX-R-v3.2.2.log b/R-CMD-BiocCheck-LINUX-R-v3.2.2.log
@@ -1,4 +1,4 @@
------ R CMD BiocCheck (Fri Feb 19 15:18:19 EST 2016) ----- 
+----- R CMD BiocCheck (Tue Mar  1 10:09:22 EST 2016) ----- 
 R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree"
 Copyright (C) 2015 The R Foundation for Statistical Computing
 Platform: x86_64-pc-linux-gnu (64-bit)
@@ -9,7 +9,7 @@ GNU General Public License versions 2 or 3.
 For more information about these matters see
 http://www.gnu.org/licenses/.
 
------ R CMD BiocCheck STARTING (Fri Feb 19 15:18:19 EST 2016) -----
+----- R CMD BiocCheck STARTING (Tue Mar  1 10:09:22 EST 2016) -----
 * This is BiocCheck, version 1.6.1.
 * BiocCheck is a work in progress. Output and severity of issues may
   change.
@@ -27,9 +27,9 @@ http://www.gnu.org/licenses/.
       strheight, plot.new, par, plot.window, rect, text, lines, par,
       plot.new, plot.window, title, segments, legend, rect, abline,
       text, strheight, strwidth, points, strheight, segments, rect,
-      strwidth, text, par, lines, formula' (used in JS.arrowChars,
-      JS.axis, JS.perGeneQValueBySimulation, JunctionSeqHTML,
-      adapted.estimateDispersionsMAP,
+      strwidth, text, par, lines, formula, terms' (used in
+      JS.arrowChars, JS.axis, JS.perGeneQValueBySimulation,
+      JunctionSeqHTML, adapted.estimateDispersionsMAP,
       adapted.estimateDispersionsPriorVar,
       adapted.estimateDispersionsPriorVar , adapted.localDispersionFit
       , adapted.parametricDispersionFit, advlines, applyByDimnameOLD,
@@ -48,7 +48,7 @@ http://www.gnu.org/licenses/.
       , generateAllExpressionEstimates.v2, get.character.dim,
       get.filtered.padjust, getAllData2, getLogFoldChangeFromModel,
       getPlottingDeviceFunc , logConditionalLikelihood ,
-      makeGeneLevelAxis, makevstaxis, plotDispEsts,
+      makeGeneLevelAxis, makevstaxis, mapGeneNames, plotDispEsts,
       plotJunctionSeqResultsForGene, plotMA, plotMA_HELPER,
       plotTranscriptsOnly, plotting.limits, qorts.axis.break,
       readAnnotationData, readGeneInfo, readJunctionSeqCounts,
@@ -63,7 +63,7 @@ http://www.gnu.org/licenses/.
       within().
 * Checking vignette directory...
 * This is a software package, checking vignette directories...
-* # of chunks: 37, # of eval=FALSE: 13 (35%)
+* # of chunks: 38, # of eval=FALSE: 13 (34%)
 * Checking version number...
 * Checking version number validity...
     * RECOMMENDED: y of x.y.z version should be even in release
@@ -100,11 +100,11 @@ See http://bioconductor.org/developers/how-to/biocViews/
 * Checking function lengths.............................
 * The longest function is 743 lines long
 * The longest 5 functions are:
-* plotJunctionSeqResultsForGene() (R/main.plotGenes.R, line 572): 743
+* plotJunctionSeqResultsForGene() (R/main.plotGenes.R, line 575): 743
   lines
-* JunctionSeqHTML() (R/file.output.R, line 16): 397 lines
-* makeAllPlotPages() (R/file.output.R, line 519): 314 lines
-* buildAllPlots() (R/main.plotGenes.R, line 18): 267 lines
+* JunctionSeqHTML() (R/file.output.R, line 16): 429 lines
+* makeAllPlotPages() (R/file.output.R, line 555): 312 lines
+* buildAllPlots() (R/main.plotGenes.R, line 18): 270 lines
 * drawPlot() (R/plotting.helpers.R, line 234): 235 lines
 * Checking man pages...
 * Checking exported objects have runnable examples...
@@ -114,11 +114,11 @@ See http://bioconductor.org/developers/how-to/biocViews/
 * Checking package NEWS...
 * Checking formatting of DESCRIPTION, NAMESPACE, man pages, R source,
   and vignette source...
-    * CONSIDER: Shortening lines; 2293 lines (17%) are > 80 characters
+    * CONSIDER: Shortening lines; 2319 lines (17%) are > 80 characters
       long.
     * CONSIDER: Replacing tabs with 4 spaces; 11 lines (0%) contain
       tabs.
-    * CONSIDER: Indenting lines with a multiple of 4 spaces; 7193 lines
+    * CONSIDER: Indenting lines with a multiple of 4 spaces; 7284 lines
       (54%) are not.
   See http://bioconductor.org/developers/how-to/coding-style/
 * Checking for canned comments in man pages...
@@ -128,4 +128,4 @@ Error in curl::curl_fetch_memory(url, handle = handle) :
   Timeout was reached
 Calls: <Anonymous> ... request_fetch -> request_fetch.write_memory -> <Anonymous> -> .Call
 Execution halted
------ R CMD BiocCheck COMPLETE (Fri Feb 19 15:19:52 EST 2016) -----
+----- R CMD BiocCheck COMPLETE (Tue Mar  1 10:10:41 EST 2016) -----