-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathnonredundant.R
43 lines (39 loc) · 1.67 KB
/
nonredundant.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
source("packages.R")
dt <- fread(cmd="xzcat data/systematic/inputs.csv.xz")
m <- as.matrix(dt[, -1, with=FALSE])
o <- apply(m, 2, order)
redundant.list <- list()
for(col.name in colnames(o)){
col.vec <- o[, col.name]
is.same <- apply(col.vec == o, 2, all)
same.name.vec <- colnames(o)[is.same]
other.name.vec <- same.name.vec[same.name.vec != col.name]
if(length(other.name.vec)){
redundant.list[[col.name]] <- other.name.vec
}
}
u.vec <- colnames(o)[!colnames(o) %in% names(redundant.list)]
start.list <- redundant.list
end.list <- list()
while(length(start.list)){
first.name <- names(start.list)[[1]]
name.vec <- start.list[[first.name]]
end.list[[length(end.list)+1]] <- name.vec[1]
remove.vec <- c(name.vec, first.name)
keep.vec <- names(start.list)[!names(start.list) %in% remove.vec]
start.list <- start.list[keep.vec]
}
c(unlist(end.list), u.vec[u.vec != "mad"])
u.features <- c(
"log.hall", "log.n", "log.bases", "log.probes.per.base", "log.bases.per.probe",
"log.rss.1", "log.rss.2", "log.rss.3", "log.rss.4", "log.rss.5",
"log.rss.6", "log.rss.7", "log.rss.8", "log.rss.9", "log.rss.10",
"log.rss.11", "log.rss.12", "log.rss.13", "log.rss.14", "log.rss.15",
"log.rss.16", "log.rss.17", "log.rss.18", "log.rss.19", "log.rss.20",
"log.mse.1", "log.mse.2", "log.mse.3", "log.mse.4", "log.mse.5",
"log.mse.6", "log.mse.7", "log.mse.8", "log.mse.9", "log.mse.10",
"log.mse.11", "log.mse.12", "log.mse.13", "log.mse.14", "log.mse.15",
"log.mse.16", "log.mse.17", "log.mse.18", "log.mse.19", "log.mse.20",
"chr17", "log.mad", "chr1", "chr2", "chr3", "chr4", "chr11")
out.dt <- data.table(feature=u.features)
fwrite(out.dt, "data/systematic/nonredundant.csv")