-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathPRS_LDpred_genotyped1.sh
executable file
·196 lines (173 loc) · 6.74 KB
/
PRS_LDpred_genotyped1.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#!/usr/bin/env bash
#Use genotyped data
#add beacon into the meta, only work on EA
#https://choishingwan.github.io/PRS-Tutorial/ldpred/
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4596916/
#https://biostat0903.github.io/DBSLMM/Scripts.html
plink=/fh/fast/stanford_j/Xiaoyu/Tools/plink-1.07-x86_64/plink1.9/plink
#extract beacon data, target data---
#step 1----
#remove ambiguous SNPs
#genotype A/B
#prefix=/fh/fast/dai_j/BEACON/BeagessCambridgeAmos/bca_filtered_19Feb2015
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/bca_filtered_30Nov2018
prefix1=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/bca_filtered_30Nov2018_flip
#ml Python #to use snpflip
snpflip=/fh/fast/dai_j/CancerGenomics/Tools/python3/snpflip/snpflip
reference=/fh/fast/stanford_j/Xiaoyu/Tools/reference/human_g1k_v37.fasta
$snpflip -b $prefix.bim -f $reference -o $prefix1
FILESIZE=$(stat -c%s ${prefix1}.reverse)
#flip
if [[ $FILESIZE -gt 0 ]];then
$plink --bfile $prefix --flip ${prefix1}.reverse --make-bed --out ${prefix1} --memory 1000
else
cp $prefix.bed ${prefix1}.bed
cp $prefix.bim ${prefix1}.bim
cp $prefix.fam ${prefix1}.fam
fi
#remove variants on ambiguous strand
FILESIZE1=$(stat -c%s ${prefix1}.ambiguous)
if [[ $FILESIZE1 -gt 0 ]];then
$plink --bfile $prefix1 --exclude ${prefix1}.ambiguous --make-bed --out ${prefix1}_noambiguous
fi
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/bca_filtered_30Nov2018_flip_noambiguous
prefix1=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/beacon_filtered_30Nov2018_flip_noambiguous
$plink --bfile $prefix \
--keep ../result/beaconsamples_plink.txt --make-bed --out $prefix1
#samples with extreme heterozygosity are typically removed prior to downstream analyses
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/beacon_filtered_30Nov2018_flip_noambiguous
#QC
$plink \
--bfile ${prefix} \
--maf 0.05 \
--hwe 1e-6 \
--geno 0.01 \
--mind 0.01 \
--write-snplist \
--make-just-fam \
--out ${prefix}_QC
$plink \
--bfile ${prefix} \
--keep ${prefix}_QC.fam \
--extract ${prefix}_QC.snplist \
--indep-pairwise 200 50 0.25 \
--out ${prefix}_QC
$plink \
--bfile ${prefix} \
--extract ${prefix}_QC.prune.in \
--keep ${prefix}_QC.fam \
--het \
--out ${prefix}_QC
#Individuals that have a first or second degree relative in the sample (π>0.125) can be removed with the following command:
prefix="/fh/fast/dai_j/BEACON/BEACON_GRANT/result/beacon_filtered_30Nov2018_flip_noambiguous"
$plink \
--bfile $prefix \
--extract ${prefix}_QC.prune.in \
--keep ${prefix}_QC.valid.sample \
--rel-cutoff 0.125 \
--out ${prefix}_QC
prefix="/fh/fast/dai_j/BEACON/BEACON_GRANT/result/beacon_filtered_30Nov2018_flip_noambiguous"
prefix1="/fh/fast/dai_j/BEACON/BEACON_GRANT/result/beacon_filtered_30Nov2018_flip_noambiguous_QC"
$plink --bfile $prefix \
--keep ../result/beaconsamples_plink.txt --make-bed --out $prefix1
#step 3----
$plink \
--bfile ${prefix}_QC \
--keep ${prefix}_QC.valid.sample \
--make-bed \
--out ${prefix1}
prefix=/fh/fast/dai_j/BEACON/BEACON_GRANT/result/beacon_filtered_30Nov2018_flip_noambiguous_QC
$plink --bfile $prefix \
--keep ${prefix}.rel.id --make-bed --out $prefix
#make BE/EA/BEEA files using PRS_LDpred.R
#work on base data using PRS_LDpred.R
#install ldpred
#ml Python/3.6.6-foss-2018b
python -m venv env
source ./env/bin/activate
python -m pip install --user ldpred
#If you want to switch projects or otherwise leave your virtual environment, simply run:
#deactivate
#If you want to re-enter the virtual environment just follow the same instructions above about activating a virtual environment. There’s no need to re-create the virtual environment.
# #ldpred=/fh/fast/dai_j/CancerGenomics/Tools/python3/lib/python3.6/site-packages/LDpred-1.0.11-py3.6.egg/ldpred/LDpred_fast.py
# #N numer of samples in base data
# ldpred coord \
# --rs MarkerName \
# --A1 Allele1 \
# --A2 Allele2 \
# --pos pos \
# --chr chr \
# --pval P \
# --eff Effect \
# --ssf-format CUSTOM \
# --N 14229 \
# --ssf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Bonn_Oxford_Cambridge_metastat.txt.gz \
# --out /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.coord \
# --gf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE
#
# nsnp=$(wc -l /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.bim) #4937535 /3000=1645
# read -r -a tmp <<< "$nsnp"
# nldr=$((${tmp[0]}/3000))
# ldpred gibbs \
# --cf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.coord \
# --ldr $nldr \
# --ldf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.ld \
# --out /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.weight \
# --N 14229
#
# ldpred score \
# --gf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.ldpred \
# --rf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.weight \
# --out /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.score \
# --pf /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE.pheno \
# --pf-format LSTANDARD
run_ldpred () {
meta="$1" #/fh/fast/dai_j/BEACON/BEACON_GRANT/result/Bonn_Oxford_Cambridge_metastat.txt.gz
target="$2" #/fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BE
N="$3" #sample size of base data
echo $target
# echo "coord"
# if [[ -f $target.coord ]] ; then rm $target.coord; fi
# rm $target.weight*.txt
# rm $target.score*.txt
# rm $target.score*.txt.adj
# rm $target.ld*.gz
ldpred coord \
--rs MarkerName \
--A1 Allele1 \
--A2 Allele2 \
--pos pos \
--chr chr \
--pval P \
--eff Effect \
--eff_type LOGOR \
--se StdErr \
--ssf-format CUSTOM \
--N $N \
--ssf $meta \
--out $target.coord \
--gf $target
echo "gibbs"
nsnp=$(wc -l $target.bim) #4937535 /3000=1645
read -r -a tmp <<< "$nsnp"
nldr=$((${tmp[0]}/3000))
ldpred gibbs \
--cf $target.coord \
--ldr $nldr \
--ldf $target.ld \
--f 1 0.3 0.1 0.03 0.01 \
--out $target.weight \
--N $N
#f is the probability that a marker is drawn from a Gaussian distribution, i.e., the fraction of causal markers.
echo "score"
ldpred score \
--gf $target \
--rf $target.weight \
--cov-file $target.covariate \
--f 1 0.3 0.1 0.03 0.01 \
--out $target.score \
--pf $target.pheno \
--pf-format LSTANDARD
}
run_ldpred /fh/fast/dai_j/BEACON/BEACON_GRANT/result/EA_Beacon_Bonn_Cambridge_metastat.txt.gz /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_EA_genotyped1 13246 &> /fh/fast/dai_j/BEACON/BEACON_GRANT/result/LDpred_Beacon_EA_genotyped1.log &
# run_ldpred /fh/fast/dai_j/BEACON/BEACON_GRANT/result/BEEA_Bonn_Cambridge_metastat.txt.gz /fh/fast/dai_j/BEACON/BEACON_GRANT/result/Beacon_BEEA_genotyped 11459 &> /fh/fast/dai_j/BEACON/BEACON_GRANT/result/LDpred_Beacon_BEEA_genotyped.log &