Skip to content

Commit 8e89290

Browse files
author
Bill Majoros
committed
update
1 parent 322416a commit 8e89290

File tree

2 files changed

+39
-5
lines changed

2 files changed

+39
-5
lines changed

Gene.py

+22
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
# strand=gene.getStrand()
3131
# substrate=gene.getSubstrate()
3232
# gff=gene.toGff()
33+
# exons=gene.getMergedExons()
3334
#
3435
######################################################################
3536

@@ -38,6 +39,27 @@ def __init__(self):
3839
self.transcripts=[]
3940
self.transcriptHash={}
4041

42+
def getMergedExons(self):
43+
transcripts=self.transcripts
44+
exons=[]
45+
for transcript in transcripts:
46+
raw=transcript.getRawExons()
47+
exons.extend(raw)
48+
#print("RAW:",len(raw))
49+
#for i in range(len(raw)):
50+
#print("\t",raw[i].begin,raw[i].end)
51+
#print()
52+
exons.sort(key=lambda x: x.begin)
53+
n=len(exons)
54+
i=0
55+
while(i<n-1):
56+
if(exons[i].overlaps(exons[i+1])):
57+
exons[i].end=max(exons[i].end,exons[i+1].end)
58+
del exons[i+1]
59+
n-=1
60+
else: i+=1
61+
return exons
62+
4163
def getStrand(self):
4264
transcripts=self.transcripts
4365
transcript=transcripts[0]

test-transcript-reader.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,21 @@
1515
#filename="/home/bmajoros/1000G/assembly/local-genes.gff"
1616
#filename="/home/bmajoros/1000G/assembly/tmp.gff"
1717
#filename="test/data/tmp.gff"
18-
filename="test/data/local-genes.gff"
18+
#filename="test/data/local-genes.gff"
19+
filename="/home/bmajoros/ensembl/protein-coding.gff"
1920

2021
reader=GffTranscriptReader()
22+
genes=reader.loadGenes(filename)
23+
for gene in genes:
24+
exons=gene.getMergedExons()
25+
unmerged=0
26+
for transcript in gene.transcripts:
27+
unmerged+=len(transcript.getRawExons())
28+
print(unmerged,"exons merged to",len(exons))
29+
#for i in range(len(exons)):
30+
# print("MERGED TO:",exons[i].begin,exons[i].end)
31+
# print()
32+
2133
#transcripts=reader.loadGFF(filename)
2234
#for transcript in transcripts:
2335
#print(transcript.getID())
@@ -49,8 +61,8 @@
4961
#for key in keys:
5062
# print(key)
5163

52-
hashTable=reader.loadGeneIdHash(filename)
53-
keys=hashTable.keys()
54-
for key in keys:
55-
print(key)
64+
#hashTable=reader.loadGeneIdHash(filename)
65+
#keys=hashTable.keys()
66+
#for key in keys:
67+
# print(key)
5668

0 commit comments

Comments
 (0)