-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathucsc_prep.py
executable file
·76 lines (60 loc) · 3.04 KB
/
ucsc_prep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import pandas as pd
import random
def generate_rand_colors(how_many):
random_colors = []
for item in list(range(how_many)):
temp_list = []
for second_item in list(range(3)):
temp_rand = random.randint(0, 129)
temp_list.append(temp_rand)
random_colors.append('%s,%s,%s' % (str(temp_list[0]),
str(temp_list[1]),
str(temp_list[2])))
return random_colors
location = input("Please input chromosomal location: ")
filename = input('Which gene are you looking at? ')
if location == '' or filename == '':
print('Please input valid location or gene name')
sys.exit()
bam_df = pd.read_csv('ucsc_config.txt', sep=',', header='infer')
bam_df['sample_name'] = bam_df['sample_name'].astype(str)
# Generating the tracknames
bam_df['track_name'] = bam_df['sample_name']
# In the event that a bam file in a different directory is inputted:
bam_df['clean_bam_name'] = bam_df.bam_name.str.split('/').str[-1]
# Checking if there's color, and adding some if there isn't
if list(set(list(bam_df['color'])))[0] == 0:
howmanycolors = bam_df.shape[0]
bam_df['color'] = generate_rand_colors(howmanycolors)
else:
bam_df['color'] = bam_df['color'].str.replace('.',',')
# Going through each file
for idx, row in bam_df.iterrows():
# Generating the smaller bam file
os.system('samtools view -bh %s %s > %s_temp_for_prog%s' %
(row['bam_name'], location, filename, row['clean_bam_name']))
# Generating the bedgraph
os.system("""bedtools genomecov -split -ibam %s_temp_for_prog%s -bg -scale %s > \
%s_%s.temp.bedgraph""" %
(filename, row['clean_bam_name'], row['normalized_read_depth'],filename, row['clean_bam_name']))
# Generating the head file that we will add to the final product
header_file = open("%s_%s_header.txt" % (filename, row['clean_bam_name'].split('.b')[0]), 'w')
header_file.write("""track type="bedGraph" name="%s" color=%s \
visibility=full autoScale=off maxHeightPixels=100:30:11 \
viewLimits=0.0:1.5\nbrowser position %s\n"""
% (row['track_name'], row['color'], location))
header_file.close()
# Catting all of the bedgraphs to the same file
if idx == 0:
os.system("""cat %s_%s_header.txt %s_%s.temp.bedgraph > \
%s.bedgraph"""
% (filename, row['clean_bam_name'].split('.b')[0], filename, row['clean_bam_name'], filename))
else:
os.system("""cat %s_%s_header.txt %s_%s.temp.bedgraph >> \
%s.bedgraph"""
% (filename, row['clean_bam_name'].split('.b')[0], filename, row['clean_bam_name'], filename))
# Removing the header and temporary files to reduce clutter:
os.system('rm %s_temp_for_prog%s' % (filename, row['clean_bam_name']))
os.system('rm %s_%s.temp.bedgraph' % (filename, row['clean_bam_name']))
os.system('rm %s_%s_header.txt' % (filename, row['clean_bam_name'].split('.b')[0]))