forked from Crazzy-Rabbit/Script-in-PopGenetics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path提取区间内SNP位点信息.txt
47 lines (33 loc) · 1.74 KB
/
提取区间内SNP位点信息.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
1、输入文件
# 位置文件(基因染色体区间坐标文件,chr_position.txt)
1 15645891 15646070
1 15646256 15646447
1 15646534 15646644
#若位置文件在window下编辑过,则用下列命令替换换行符^m使用ctrl v + ctrl m 组合键吗,不要直接打出
sed -e 's/^M/\n/g' position.txt > position2.txt
# vcf文件
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CF2
1 112 94 . T C 33689.1 . AC=1;AF=0.472;AN=2;BaseQRankSum=-0.076;DP=3953;ExcessHe
2 1123665 547 . A G 67292.8 . AC=1;AF=0.489;AN=2;BaseQRankSum=0;DP=4273;ExcessHet=129
5 112553 1245 . A T 69849.7 . AC=1;AF=0.466;AN=2;BaseQRankSum=-0.339;DP=4231;ExcessHe
5 142554 2086 . C G 80488.5 . AC=1;AF=0.506;AN=2;Base
7 1111125 2334 . A G 42490.1 . AC=1;AF=0.466;AN=2;BaseQRankSum=-0.028;DP=3806;ExcessHe
10 1111445 2335 . A T 53023.1 . AC=1;AF=0.466;AN=2;BaseQRankSum=0.199;DP=3835;ExcessHet
2、bash脚本
cat $1 |while read id;
do echo $id
arr=($id)
chr=${arr[0]}
from=${arr[1]}
to=${arr[2]}
echo $chr $from $to
if echo $2 |grep -q '.*.vcf.gz$';then
vcftools --gzvcf $2 --chr $chr --from-bp $from --to-bp $to --recode --recode-INFO-all --out chr$chr.$from-$to
elif echo $2 |grep -q '.*.vcf$';then
vcftools --vcf $2 --chr $chr --from-bp $from --to-bp $to --recode --recode-INFO-all --out chr$chr.$from-$to
fi
done
3、运行
bash Chr_position_tk_snp.sh chr_position.txt test.vcf
4、合并结果
cat chr*.recode.vcf |grep -v '#'> all_select_snp.txt