-
Notifications
You must be signed in to change notification settings - Fork 0
/
ChIP_GSM_K562_demo.R
107 lines (86 loc) · 4.74 KB
/
ChIP_GSM_K562_demo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
rm(list = ls())
graphics.off()
library(igraph)
library(prodlim)
setwd('Package_K562_promoter_demo')
source('Package_K562_promoter_demo/ChIP_GSM_functions.R')
load('K562_R_demo.Rdata')
print('ChIP-GSM Model parameter initialization!')
# ChIP-GSM model initialization
Gibbs_round=1000
Fold_change_factor=10
Num_TFs=length(Sampling_input_data_frame$Candidate_TFs)
Num_clusters=nrow(Sampling_input_data_frame$C_candidate_matrix)
Num_windows = length(Sampling_input_data_frame$Sampling_window_ID )
TX=matrix(0, nrow=Num_TFs, ncol=1)
TI=matrix(0, nrow=Num_TFs, ncol=1)
Binding_pattern=matrix(0, nrow=Num_windows, ncol=Num_TFs)
Cluster_pattern=matrix(0, nrow=Num_windows, ncol=Num_clusters)
# randomly assign each window or region a regulatory module
for (k in 1:Num_windows){
candidate_cluster=which(Sampling_input_data_frame$Cluster_mapping[k,]>0)
rand_index=sample(length(candidate_cluster), 1)
Binding_pattern[k,]=Sampling_input_data_frame$C_candidate_matrix[candidate_cluster[rand_index[1]],]
Cluster_pattern[k,candidate_cluster[rand_index[1]]]=1
}
# determine the total number of read tags to be assigned to forgournd and background regions respectively for each TF
for (t in 1:Num_TFs){
binding_index=which(Binding_pattern[,t]>0)
Background_sampling_weight = rgamma(Num_windows, shape=alpha_I[t], scale=beta_I[t])
Forground_sampling_weight = Background_sampling_weight
Forground_sampling_weight[binding_index] = Fold_change_factor*Forground_sampling_weight[binding_index]
Forground_sampling_weight = Forground_sampling_weight/(sum(Forground_sampling_weight))
TX[t]=floor(sum(Forground_sampling_weight[binding_index])*Sampling_input_data_frame$TF_count_summary[t])
TI[t]=Sampling_input_data_frame$TF_count_summary[t]-TX[t]
}
# noise variance
alpha_N=2
beta_N=5
Nsigma=1/rgamma(1, shape=alpha_N, scale=1/beta_N)
Summition_binding=Binding_pattern
Summition_cluster=Cluster_pattern
Binding_pattern_old=Binding_pattern
# binding piror of distance
window_size=500
promoter_size=10000
Background_prior=matrix(1, nrow=nrow(Binding_pattern), ncol=ncol(Binding_pattern))
alpha_lambda=1
beta_lambda=1
Binding_prior<-sampling_lambda_distance(Sampling_input_data_frame, Binding_pattern, alpha_lambda, beta_lambda,
Num_windows, Num_TFs, window_size, promoter_size)
print('Sampling starts!')
# for loop for sampling
for (g in 1:1000){
print(paste('Sampling round:',toString(g)))
#***********************sampling X based on binding
X <- sampling_X_cluster_distance(Sampling_input_data_frame, Binding_pattern_old, TX, Xmin, Nsigma, Xupper,
Pdfx_PowerLaw, Binding_prior, Num_windows, Num_TFs)
#***********************sampling I based on none binding
I <- sampling_I_cluster_distance(Sampling_input_data_frame, Binding_pattern_old, TI, Nsigma, Xupper,
Pdfi_Gamma, Background_prior, Num_windows, Num_TFs)
#***********************sampling noise based on sampled X and I
Nsigma <- sampling_Nsigma_cluster(Sampling_input_data_frame, X, I, alpha_N, beta_N, Num_windows, Num_TFs)
print(Nsigma)
# sampling clusters and update bindings
Sampling_temp_results <- sampling_binding_cluster_distance(Sampling_input_data_frame, Binding_pattern_old, X, Xmin, Xupper, I, Nsigma,
Pdfx_PowerLaw, Pdfi_Gamma, Binding_prior, Background_prior,
Num_windows, Num_TFs, Num_clusters)
# sampling distance parameter lambda
Binding_prior<-sampling_lambda_distance(Sampling_input_data_frame, Sampling_temp_results$Binding_pattern, alpha_lambda, beta_lambda,
Num_windows, Num_TFs, window_size, promoter_size)
# control all tags used for sampling
Read_count_total<-sampling_TX_TI_cluster(Sampling_input_data_frame, Fold_change_factor, alpha_I, beta_I, Sampling_temp_results$Binding_pattern, Num_TFs)
TX<-Read_count_total$TX
TI<-Read_count_total$TI
# samples accumulation
print('Propotion of all candidate positions:')
print(sum(Sampling_temp_results$Binding_pattern)/sum(Sampling_input_data_frame$Sampling_window_flag))
print('Propotion of previous round of candidate positions:')
print(sum(Sampling_temp_results$Binding_pattern*Binding_pattern_old)/sum(Binding_pattern_old))
Binding_pattern_old=Sampling_temp_results$Binding_pattern
Summition_binding=Summition_binding+Sampling_temp_results$Binding_pattern
Summition_cluster=Summition_cluster+Sampling_temp_results$Cluster_pattern
if (g%%10==1){
save(Summition_binding, Summition_cluster, file='K562_temp_results_lambda_noise.Rdata')
}
}