-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfasta_to_json.py
31 lines (25 loc) · 1.12 KB
/
fasta_to_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from ribopy import Ribo
import ribopy
from Fasta import FastaFile
import json
from argparse import ArgumentParser
from utils import get_cds_range_lookup
study = 'GSE51584'
experiment = 'GSM1248729'
default_ribo_path = f"/scratch/users/mjgeng/process-multiple-ribo/output/{study}/ribo/experiments/{experiment}.ribo"
parser = ArgumentParser()
parser.add_argument('--path_to_fasta', default="data/appris_human_v2_selected.fa.gz", dest='path_to_fasta')
parser.add_argument('--path_to_ribo', default=default_ribo_path, dest='path_to_ribo')
args = parser.parse_args()
ribo = Ribo(args.path_to_ribo, alias=ribopy.api.alias.apris_human_alias)
fasta = FastaFile(args.path_to_fasta)
fasta_dict = {e.header: e.sequence for e in fasta}
sequence_dict = {
ribopy.api.alias.apris_human_alias(transcript): fasta_dict[transcript] for transcript in ribo.transcript_names
}
with open("data/sequence_dict.json", 'w+') as f:
json.dump(sequence_dict, f)
boundary_lookup = get_cds_range_lookup(ribo)
cds_ranges = {k: [int(v[1][0]), int(v[1][1])] for k, v in boundary_lookup.items()}
with open("data/cds_ranges.json", 'w+') as f:
json.dump(cds_ranges, f)