-
Notifications
You must be signed in to change notification settings - Fork 3
/
assess_significance.R
72 lines (58 loc) · 2.78 KB
/
assess_significance.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env Rscript
library(rtracklayer)
args <- commandArgs()
dataTable <- read.table(args[5], header = TRUE)
ratio <- data.frame(dataTable)
dataTable <- read.table(args[4], header = FALSE)
cnvs <- data.frame(dataTable)
ratio$Ratio[which(ratio$Ratio == -1)] <- NA
cnvs.bed <- GRanges(cnvs[, 1], IRanges(cnvs[, 2], cnvs[, 3]))
ratio.bed <- GRanges(ratio$Chromosome, IRanges(ratio$Start, ratio$Start), score = ratio$Ratio)
overlaps <- subsetByOverlaps(ratio.bed, cnvs.bed)
normals <- setdiff(ratio.bed, cnvs.bed)
normals <- subsetByOverlaps(ratio.bed, normals)
# mu <- mean(score(normals),na.rm=TRUE)
# sigma<- sd(score(normals),na.rm=TRUE)
# hist(score(normals),n=500,xlim=c(0,2))
# hist(log(score(normals)),n=500,xlim=c(-1,1))
# shapiro.test(score(normals)[which(!is.na(score(normals)))][5001:10000])
# qqnorm (score(normals)[which(!is.na(score(normals)))],ylim=(c(0,10)))
# qqline(score(normals)[which(!is.na(score(normals)))], col = 2)
# shapiro.test(log(score(normals))[which(!is.na(score(normals)))][5001:10000])
# qqnorm (log(score(normals))[which(!is.na(score(normals)))],ylim=(c(-6,10)))
# qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2)
numberOfCol <- length(cnvs)
for (i in c(1:length(cnvs[, 1]))) {
values <- score(subsetByOverlaps(ratio.bed, cnvs.bed[i]))
# wilcox.test(values,mu=mu)
W <- function(values, normals) {
resultw <- try(wilcox.test(values, score(normals)), silent = TRUE)
if (class(resultw) == "try-error") {
return(list("statistic" = NA, "parameter" = NA, "p.value" = NA, "null.value" = NA, "alternative" = NA, "method" = NA, "data.name" = NA))
} else {
resultw
}
}
KS <- function(values, normals) {
resultks <- try(ks.test(values, score(normals)), silent = TRUE)
if (class(resultks) == "try-error") {
return(list("statistic" = NA, "p.value" = NA, "alternative" = NA, "method" = NA, "data.name" = NA))
} else {
resultks
}
}
# resultks <- try(KS <- ks.test(values,score(normals)), silent = TRUE)
# if(class(resultks)=="try-error") NA) else resultks
cnvs[i, numberOfCol + 1] <- W(values, normals)$p.value
cnvs[i, numberOfCol + 2] <- KS(values, normals)$p.value
}
if (numberOfCol == 5) {
names(cnvs) <- c("chr", "start", "end", "copy number", "status", "WilcoxonRankSumTestPvalue", "KolmogorovSmirnovPvalue")
}
if (numberOfCol == 7) {
names(cnvs) <- c("chr", "start", "end", "copy number", "status", "genotype", "uncertainty", "WilcoxonRankSumTestPvalue", "KolmogorovSmirnovPvalue")
}
if (numberOfCol == 9) {
names(cnvs) <- c("chr", "start", "end", "copy number", "status", "genotype", "uncertainty", "somatic/germline", "precentageOfGermline", "WilcoxonRankSumTestPvalue", "KolmogorovSmirnovPvalue")
}
write.table(cnvs, file = paste(args[4], ".p.value.txt", sep = ""), sep = "\t", quote = F, row.names = F)