-
Notifications
You must be signed in to change notification settings - Fork 0
/
MPPC_cor.r
81 lines (57 loc) · 2.77 KB
/
MPPC_cor.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/Rscript
#### test correlation between parallel chains of peaky_fs
#### update to run all LIBs
# load
library(peaky)
library(data.table)
# functions
MPPC_cor = function(result_A, result_B){
cor(result_A$rjmcmc_pos, result_B$rjmcmc_pos)
}
MPPC_cor_by_dir = function(baits_rjmcmc_dir_A, baits_rjmcmc_dir_B, baits_dir, cor_threshold=.75){
files_A = grep("bait_rjmcmc_[0-9]+.rds$",list.files(baits_rjmcmc_dir_A),value=TRUE)
files_B = grep("bait_rjmcmc_[0-9]+.rds$",list.files(baits_rjmcmc_dir_B),value=TRUE)
if (!dir.exists(baits_dir)) {
stop("Baits directory not found.")
}
L = paste0(baits_dir, "/MPPCcor_log.txt")
peaky:::note(L,TRUE,"Calculating MPPC cor for interpreted peaky results in:\nA: ",
baits_rjmcmc_dir_A,"\nB: ",baits_rjmcmc_dir_B)
files_all = union(files_A,files_B)
files_both = intersect(files_A,files_B)
paths_A = paste0(baits_rjmcmc_dir_A,"/",files_both)
paths_B = paste0(baits_rjmcmc_dir_B,"/",files_both)
peaky:::note(L,FALSE,"Unique bait results across A and B: ",length(files_all),
"\nBait results in both A and B: ", length(files_both))
peaky:::note(L,TRUE,"Loading common results and calculating MPPC...")
results_A = lapply(paths_A,readRDS)
results_B = lapply(paths_B,readRDS)
cors = mapply(MPPC_cor, results_A, results_B)
cor_table = data.table(results_file=files_both, MPPC_cor=cors)
file_table = data.table(results_file=files_all, in_A=files_all%in%files_A, in_B=files_all%in%files_B,
bait_path=paste0(baits_dir,"/",gsub("rjmcmc_","",files_all)))
file_cor_table = merge(file_table, cor_table, by="results_file", all.x = TRUE)
file_cor_table[,redo:=!is.na(MPPC_cor) & MPPC_cor<cor_threshold]
table_path = paste0(baits_dir,"/","MPPCcor.csv")
baitlist_path = paste0(baits_dir,"/","baitlist_MPPCcor_sub_",cor_threshold,".txt")
peaky:::note(L,TRUE,"Saving MPPC correlation overview:\n",table_path,
"\nSaving baitlist for baits with MPPC correlation below ",cor_threshold,":\n",baitlist_path)
fwrite(file_cor_table, table_path)
write(file_cor_table[redo==TRUE,bait_path], baitlist_path)
peaky:::note(L,FALSE,"Done.")
return(file_cor_table)
}
# CHIC samples
sample_list = c("B80T5", "Hs578T", "MCF10A", "MCF7", "MDAMB231", "T47D")
for(k in c("SCHIC")){
run_dir = paste0("/working/lab_georgiat/jonathB/CaptureHiC/peaky/peaky_fs/peaky_chain_runs/",k,"_split_baits/")
for(i in sample_list){
# use full paths
baits_rjmcmc_dir_A = paste0(run_dir,i,"/baits_rjmcmc1/")
baits_rjmcmc_dir_B = paste0(run_dir,i,"/baits_rjmcmc2/")
baits_dir = paste0(run_dir,i,"/baits/")
cor_threshold = 0.75
# run
MPPC_cor_by_dir(baits_rjmcmc_dir_A, baits_rjmcmc_dir_B, baits_dir, 0.75)
}
}