forked from Crazzy-Rabbit/Script-in-PopGenetics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Vcf2heatmapFile.py
51 lines (42 loc) · 1.33 KB
/
Vcf2heatmapFile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on 10 05 18:57:48 2023
@Author: Lulu Shi
@Mails: crazzy_rabbit@163.com
"""
import os
import click
import numpy as np
import pandas as pd
def load_vcf(vcf):
tempout = open ("tempfile.txt", 'w')
for line in vcf:
if not line.startswith('##'):
snp = line.strip().split('\t')[9:]
ID = line.strip().split('\t')[2:3]
tempout.write('\t'.join(ID + snp) + '\n')
def changesnp():
file = pd.read_csv("tempfile.txt",
sep='\t', header=None)
file_arr = np.array(file.T)
file_arr[file_arr == '0/0'] = 0
file_arr[file_arr == '0/1'] = 1
file_arr[file_arr == '1/0'] = 1
file_arr[file_arr == '1/1'] = 1
return file_arr
@click.command()
@click.option('--vcf', type=click.File('r'), help='VCF file to heatmap', required=True)
@click.option('--out', type=str, help='out file perfix', required=True)
def main(vcf, out):
"""
直接将提取的vcf文件转换为绘制单倍型图R脚本的输入文件
"""
load_vcf(vcf)
file_arr = changesnp()
os.system('rm tempfile.txt')
file_pd = pd.DataFrame(file_arr)
file_pd.to_csv(f'{out}.txt', sep='\t',
header=False, index=None)
if __name__ == '__main__':
main()