-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpreprocess-gcts
executable file
·42 lines (33 loc) · 1.54 KB
/
preprocess-gcts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env Rscript
library(argparser)
library(futile.logger)
library(foreach)
library(assertthat)
library(caret)
source("pre_fit.R")
source("k_fold.R")
source("demeter.R")
invisible(flog.threshold(DEBUG))
parser <- arg_parser("Consumes multiple GCT files with shRNA fold-change data and performs preprocessing to generate the datastructure that DEMETER uses as input.")
parser <- add_argument(parser, "output_file", "Path to write data post preprocessing")
parser <- add_argument(parser, "batches", "Path to csv file which contains which cell line was contained within which batch")
parser <- add_argument(parser, "foldchangegct", "Comma seperated list of gct files containing shRNA fold-change data")
args <- parse_args(parser)
fc.mat.filenames <- strsplit(args$foldchangegct, ",")[[1]]
batches.filename <- args$batches
output.filename <- args$output_file
batch.per.sample <- read.csv(batches.filename, check.names=F)
if (any(!(c("DEMETER batch", "Name") %in% colnames(batch.per.sample)))) {
stop(paste0("The batch file \"", batches.filename, "\" must contain columns named \"DEMETER batch\" and \"Name\""))
}
batch.per.sample <- setNames(batch.per.sample[["DEMETER batch"]],
batch.per.sample[["Name"]])
fc.matrices <- lapply(fc.mat.filenames, function(x) {
df <- read.table(x, as.is=T, header=T, skip=2, row.names=1, quote='', comment='', check.names=F)
m <- as.matrix(df[,c(-1)])
colnames(m) <- colnames(df[,-1])
rownames(m) <- rownames(df)
m
})
m <- merge.data.for.DEMETER(fc.matrices, batch.per.sample)
save(m, file=output.filename)