-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathPRS_LDpred_cambridge.sh
executable file
·162 lines (136 loc) · 5.11 KB
/
PRS_LDpred_cambridge.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env bash
#only work on EA
#use Cambridge+BEACONcontrol as the validation, only work on EA
#BEACONcase+AMOS+other BEACONcontrol for discovery
#https://choishingwan.github.io/PRS-Tutorial/ldpred/
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4596916/
#https://biostat0903.github.io/DBSLMM/Scripts.html
plink=/fh/fast/stanford_j/Xiaoyu/Tools/plink-1.07-x86_64/plink1.9/plink
#use imputed cambamos data
#extract cambamos data, target data---
#step1--------------
rm ../result/BCA1000gnoambiguousmergelist.txt
for chr in {1..22}
do
echo /fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/chr${chr}_filter_noambiguous >> ../result/BCA1000gnoambiguousmergelist.txt
done
$plink --merge-list ../result/BCA1000gnoambiguousmergelist.txt --make-bed \
--out /fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/bca_filter_noambiguous
#do GWAS on BEACON
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/bca_filter_noambiguous
$plink --bfile $prefix \
--keep ../result/GWAS/Discovery_EA_CO_selectedsamples_plink.txt --make-bed \
--out ../result/GWAS/Discovery_EA_CO_gwas
#update_fam() in R
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/GWAS/Beacon_EA_CO_gwas
$plink --bfile $prefix --covar ../result/GWAS/Beacon_EA_CO_selectedsamples_pheno_plink.txt \
--covar-name pc1,pc2,pc3,pc4,sex,age --logistic --beta --hide-covar --ci 0.95 --out $prefix
#working on validation data
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/bca_filter_noambiguous
prefix1=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous
$plink --bfile $prefix \
--keep ../result/validationsamples_plink.txt --make-bed --out $prefix1
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous
#QC
$plink \
--bfile ${prefix} \
--maf 0.05 \
--hwe 1e-6 \
--geno 0.01 \
--mind 0.01 \
--write-snplist \
--make-just-fam \
--out ${prefix}_QC
$plink \
--bfile ${prefix} \
--keep ${prefix}_QC.fam \
--extract ${prefix}_QC.snplist \
--indep-pairwise 200 50 0.25 \
--out ${prefix}_QC
$plink \
--bfile ${prefix} \
--extract ${prefix}_QC.prune.in \
--keep ${prefix}_QC.fam \
--het \
--out ${prefix}_QC
#Individuals that have a first or second degree relative in the sample (π>0.125) can be removed with the following command:
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous
$plink \
--bfile $prefix \
--extract ${prefix}_QC.prune.in \
--keep ${prefix}_QC.valid.sample \
--rel-cutoff 0.125 \
--out ${prefix}_QC
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous
prefix1=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous_QC
$plink --bfile $prefix \
--keep ../result/validationsamples_plink.txt --make-bed --out $prefix1
$plink \
--bfile ${prefix1} \
--keep ${prefix1}.valid.sample \
--make-bed \
--out ${prefix1}
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/data/imputation/bca_1000g/validation_filter_noambiguous_QC
$plink --bfile $prefix \
--keep ${prefix}.rel.id --make-bed --out $prefix
#step7----
#make BE/EA/BEEA files using PRS_LDpred.R
#work on base data using PRS_LDpred.R
#install ldpred
#ml Python
#python -m venv env
#source ./env/bin/activate
#python -m pip install ldpred
#If you want to switch projects or otherwise leave your virtual environment, simply run:
#deactivate
#A1:effective allele,--only-hm3
run_ldpred () {
meta="$1" #/fh/fast/dai_j/BEACON/BEACON_GRANT/result/Bonn_Oxford_Cambridge_metastat.txt.gz
target="$2" #/fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE
N="$3" #sample size of base data
echo $target
# if [[ -f $target.coord ]] ; then rm $target.coord; fi
# rm $target.weight*.txt
# #rm $target.score*.txt
# #rm $target.score*.txt.adj
# rm $target.ld*.gz
# echo "coord"
# ldpred coord \
# --rs MarkerName \
# --A1 Allele1 \
# --A2 Allele2 \
# --pos pos \
# --chr chr \
# --pval P \
# --only-hm3 \
# --eff Effect \
# --ssf-format CUSTOM \
# --eff_type LOGOR \
# --se StdErr \
# --N $N \
# --ssf $meta \
# --out $target.coord \
# --gf $target
echo "gibbs"
nsnp=$(wc -l $target.bim) #4937535 /3000=1645
read -r -a tmp <<< "$nsnp"
nldr=$((${tmp[0]}/3000))
ldpred gibbs \
--cf $target.coord \
--ldr $nldr \
--ldf $target.ld \
--f 0.003 0.001 \
--out $target.weight \
--N $N
#f is the probability that a marker is drawn from a Gaussian distribution, i.e., the fraction of causal markers.
echo "score"
ldpred score \
--gf $target \
--rf $target.weight \
--cov-file $target.covariate \
--f 0.003 0.001 \
--out $target.score \
--pf $target.pheno \
--pf-format LSTANDARD
}
run_ldpred /fh/fast/dai_j/BEACON/BEACON_GRANT/result/EA_Discovery_Bonn_imp_metastat.txt.gz /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Validation_EA 8177 &> /fh/fast/dai_j/BEACON/BEACON_GRANT/result/LDpred_Validation_EA1.log &