Skip to content

Commit 025a0d7

Browse files
authored
Fix EdgeR: Sanitise base name for files when coming from contrasts or factors (galaxyproject#5549)
* Sanitise base name for files when coming from contrasts or factors * Bump version, tests and less restrictive set * Please lintr
1 parent cd62639 commit 025a0d7

File tree

3 files changed

+34
-22
lines changed

3 files changed

+34
-22
lines changed

tools/edger/edger.R

+25-19
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ unmake_names <- function(string) {
8585
return(string)
8686
}
8787

88+
# Sanitise file base names coming from factors or contrasts
89+
sanitise_basename <- function(string) {
90+
string <- gsub("[/^]", "_", string)
91+
return(string)
92+
}
93+
8894
# Generate output folder and paths
8995
make_out <- function(filename) {
9096
return(paste0(out_path, "/", filename))
@@ -331,16 +337,16 @@ ql_png <- make_out("qlplot.png")
331337
mds_pdf <- character() # Initialise character vector
332338
mds_png <- character()
333339
for (i in seq_len(ncol(factors))) {
334-
mds_pdf[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".pdf"))
335-
mds_png[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".png"))
340+
mds_pdf[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf"))
341+
mds_png[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png"))
336342
}
337343
md_pdf <- character()
338344
md_png <- character()
339345
top_out <- character()
340346
for (i in seq_along(contrast_data)) {
341-
md_pdf[i] <- make_out(paste0("mdplot_", contrast_data[i], ".pdf"))
342-
md_png[i] <- make_out(paste0("mdplot_", contrast_data[i], ".png"))
343-
top_out[i] <- make_out(paste0("edgeR_", contrast_data[i], ".tsv"))
347+
md_pdf[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf"))
348+
md_png[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png"))
349+
top_out[i] <- make_out(paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv"))
344350
} # Save output paths for each contrast as vectors
345351
norm_out <- make_out("edgeR_normcounts.tsv")
346352
rda_out <- make_out("edgeR_analysis.RData")
@@ -446,15 +452,15 @@ labels <- names(counts)
446452
# MDS plot
447453
png(mds_png, width = 600, height = 600)
448454
plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1]))
449-
img_name <- paste0("MDS Plot_", names(factors)[1], ".png")
450-
img_addr <- paste0("mdsplot_", names(factors)[1], ".png")
455+
img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[1]), ".png")
456+
img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[1]), ".png")
451457
image_data[1, ] <- c(img_name, img_addr)
452458
invisible(dev.off())
453459

454460
pdf(mds_pdf)
455461
plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1]))
456-
link_name <- paste0("MDS Plot_", names(factors)[1], ".pdf")
457-
link_addr <- paste0("mdsplot_", names(factors)[1], ".pdf")
462+
link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[1]), ".pdf")
463+
link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[1]), ".pdf")
458464
link_data[1, ] <- c(link_name, link_addr)
459465
invisible(dev.off())
460466

@@ -463,15 +469,15 @@ if (ncol(factors) > 1) {
463469
for (i in 2:ncol(factors)) {
464470
png(mds_png[i], width = 600, height = 600)
465471
plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
466-
img_name <- paste0("MDS Plot_", names(factors)[i], ".png")
467-
img_addr <- paste0("mdsplot_", names(factors)[i], ".png")
472+
img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".png")
473+
img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png")
468474
image_data <- rbind(image_data, c(img_name, img_addr))
469475
invisible(dev.off())
470476

471477
pdf(mds_pdf[i])
472478
plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
473-
link_name <- paste0("MDS Plot_", names(factors)[i], ".pdf")
474-
link_addr <- paste0("mdsplot_", names(factors)[i], ".pdf")
479+
link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".pdf")
480+
link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf")
475481
link_data <- rbind(link_data, c(link_name, link_addr))
476482
invisible(dev.off())
477483
}
@@ -549,8 +555,8 @@ for (i in seq_along(contrast_data)) {
549555
top <- topTags(res, adjust.method = opt$pAdjOpt, n = Inf, sort.by = "PValue")
550556
write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE)
551557

552-
link_name <- paste0("edgeR_", contrast_data[i], ".tsv")
553-
link_addr <- paste0("edgeR_", contrast_data[i], ".tsv")
558+
link_name <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
559+
link_addr <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
554560
link_data <- rbind(link_data, c(link_name, link_addr))
555561

556562
# Plot MD (log ratios vs mean difference) using limma package
@@ -564,8 +570,8 @@ for (i in seq_along(contrast_data)) {
564570

565571
abline(h = 0, col = "grey", lty = 2)
566572

567-
link_name <- paste0("MD Plot_", contrast_data[i], ".pdf")
568-
link_addr <- paste0("mdplot_", contrast_data[i], ".pdf")
573+
link_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".pdf")
574+
link_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf")
569575
link_data <- rbind(link_data, c(link_name, link_addr))
570576
invisible(dev.off())
571577

@@ -579,8 +585,8 @@ for (i in seq_along(contrast_data)) {
579585

580586
abline(h = 0, col = "grey", lty = 2)
581587

582-
img_name <- paste0("MD Plot_", contrast_data[i], ".png")
583-
img_addr <- paste0("mdplot_", contrast_data[i], ".png")
588+
img_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".png")
589+
img_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png")
584590
image_data <- rbind(image_data, c(img_name, img_addr))
585591
invisible(dev.off())
586592
}

tools/edger/edger.xml

+7-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
</description>
55
<macros>
66
<token name="@TOOL_VERSION@">3.36.0</token>
7-
<token name="@VERSION_SUFFIX@">3</token>
7+
<token name="@VERSION_SUFFIX@">4</token>
88
</macros>
99
<edam_topics>
1010
<edam_topic>topic_3308</edam_topic>
@@ -694,7 +694,7 @@ cp '$outReport.files_path'/*.tsv output_dir/
694694
<param name="cinfo" value="contrasts_file.txt"/>
695695
<param name="formula" value="~ 0 + Genotype + Batch"/>
696696
<param name="normalisationOption" value="TMM"/>
697-
<output_collection name="outTables" count="2">
697+
<output_collection name="outTables" count="3">
698698
<element name="edgeR_Mut-WT" ftype="tabular">
699699
<assert_contents>
700700
<has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR"/>
@@ -706,6 +706,11 @@ cp '$outReport.files_path'/*.tsv output_dir/
706706
<has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR"/>
707707
</assert_contents>
708708
</element>
709+
<element name="edgeR_(2*Mut_3*WT)-WT" ftype="tabular">
710+
<assert_contents>
711+
<has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR"/>
712+
</assert_contents>
713+
</element>
709714
</output_collection>
710715
</test>
711716
</tests>
+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
Contrasts
22
Mut-WT
3-
WT-Mut
3+
WT-Mut
4+
(2*Mut/3*WT)-WT

0 commit comments

Comments
 (0)