Skip to content

Commit f5a433f

Browse files
author
Bill Majoros
committedMar 24, 2017
update
1 parent 3d90f8f commit f5a433f

File tree

4 files changed

+29
-18
lines changed

4 files changed

+29
-18
lines changed
 

‎EssexNode.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
# elements : array
2424
# Methods:
2525
# node=EssexNode([tag,elem1,elem2,...])
26+
# node->addElem(elem)
2627
# tag=node.getTag()
2728
# node.changeTag(newTag)
2829
# n=node.numElements()
@@ -37,6 +38,7 @@
3738
# n=node.countDescendents(tag)
3839
# bool=node.hasDescendent(tag)
3940
# string=node.getAttribute(attributeTag)
41+
# node.setAttribute(tag,value)
4042
# array=node.getElements()
4143
# bool=EssexNode.isaNode(datum)
4244
# bool=node.hasCompositeChildren()
@@ -67,6 +69,17 @@ def __init__(self,parms):
6769
self.tag=""
6870
self.elements=[]
6971

72+
def addElem(self,elem):
73+
self.elements.append(elem)
74+
75+
def setAttribute(self,tag,value):
76+
elements=self.elements
77+
for elem in elements:
78+
if(EssexNode.isaNode(elem) and elem.getTag()==tag):
79+
elem.setIthElem(0,value)
80+
return
81+
elements.append(EssexNode([tag,value]))
82+
7083
def getTag(self):
7184
return self.tag
7285

@@ -227,7 +240,7 @@ def printRecursive(self,depth,file):
227240
else:
228241
for i in range(n):
229242
elem=elements[i]
230-
file.write(" "+elem)
243+
file.write(" "+str(elem))
231244
file.write(")")
232245

233246
def printExonXML(self,tag,tab,depth,file):

‎GffTranscriptReader.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,11 @@ def adjustStartCodons(self,transcripts):
179179
startCodon=None
180180
totalIntronSize=Integer(0)
181181
if(strand=="+"):
182-
startCodon=self.adjustStartCodons_fw(transcript,totalIntronSize)
182+
startCodon=\
183+
self.adjustStartCodons_fw(transcript,totalIntronSize)
183184
else:
184-
startCodon=self.adjustStartCodons_bw(transcript,totalIntronSize)
185+
startCodon=\
186+
self.adjustStartCodons_bw(transcript,totalIntronSize)
185187
if(startCodon is not None):
186188
startCodon-=int(totalIntronSize)
187189
transcript.startCodon=startCodon
@@ -391,7 +393,7 @@ def loadGFF(self,gffFilename):
391393
if(not line): break
392394
if(not re.search("\S+",line)): continue
393395
if(re.search("^\s*\#",line)): continue
394-
fields=line.split()
396+
fields=line.split("\t") ### \t added 3/24/2017
395397
if(len(fields)<8): raise Exception("can't parse GTF:"+line)
396398
if(fields[2]=="transcript"):
397399
#print("loading transcript line")

‎Rex.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
# Attributes:
1414
# match : returned from re.search()
1515
# Instance Methods:
16-
# r=Rex()
17-
# bool=r.find("abc(\d+)def(\d+)ghi(\d+)",line)
18-
# x=r[1]; y=r[2]; z=r[3]
16+
# rex=Rex()
17+
# bool=rex.find("abc(\d+)def(\d+)ghi(\d+)",line)
18+
# rex.findOrDie("abc(\d+)def(\d+)ghi(\d+)",line)
19+
# x=rex[1]; y=rex[2]; z=rex[3]
1920
#=========================================================================
2021
class Rex:
2122
"""Rex -- more compact regular expression matching similar to Perl"""
@@ -27,6 +28,9 @@ def find(self,pattern,line):
2728
self.match=re.search(pattern,line)
2829
return self.match is not None
2930

31+
def findOrDie(self,pattern,line):
32+
if(not self.find(pattern,line)): raise Exception("can't parse: "+line)
33+
3034
def __getitem__(self,index):
3135
return self.match.group(index)
3236

‎Transcript.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ def trimUTR(self,axisSequenceRef):
527527
numExons-=1
528528
j-=1
529529
startCodon-=length
530-
self.adjustOrders() ### 4/1/03
530+
self.adjustOrders()
531531
else:
532532
if(strand=="+"):
533533
exon.trimInitialPortion(startCodon)
@@ -692,38 +692,30 @@ def setStrand(self,strand):
692692
for exon in exons: exon.setStrand(strand)
693693

694694
def getRawExons(self):
695-
#print("getRawExons",self.getID())
696695
rawExons=self.rawExons
697696
if(not rawExons or len(rawExons)==0):
698697
exons=self.exons
699698
UTR=self.UTR
700699
rawExons=[]
701700
for exon in exons:
702-
#print("exon",exon.getLength())
703701
if(exon.getLength()>0): rawExons.append(exon.copy())
704702
for utr in UTR:
705-
#print("UTR",utr.getLength())
706703
if(utr.getLength()>0): rawExons.append(utr.copy())
707704
# Sort into chromosome order (temporarily):
708705
rawExons.sort(key=lambda exon: exon.begin)
709706
# Now coalesce any UTR-exon pairs that are adjacent:
710707
n=len(rawExons)
711-
#print(n,"exons")
712708
i=0
713709
while(i<n):
714710
exon=rawExons[i]
715-
#print("exon",i,exon.getBegin(),exon.getEnd())
716711
exon.setType("exon")
717712
if(i+1<n):
718713
nextExon=rawExons[i+1]
719-
#print("next exon",i+1,nextExon.getBegin(),nextExon.getEnd())
720714
if(exon.getEnd()==nextExon.getBegin() or
721715
exon.getEnd()==nextExon.getBegin()-1):
722716
exon.setEnd(nextExon.getEnd())
723717
nextExon=None
724-
#print("deleting exon",i+1,"count="+str(len(rawExons)))
725718
rawExons.pop(i+1)
726-
#print("now count="+str(len(rawExons)))
727719
n-=1
728720
i-=1
729721
i+=1
@@ -742,10 +734,10 @@ def parseExtraFields(self):
742734
if(string is None): return pairs
743735
fields=string.split(";")
744736
for field in fields:
745-
match=re.search("(\S+)[\s=]+(\S+)",field)
737+
match=re.search("(\S+)[\s=]+([^;]+)",field)
746738
if(not match): continue
747739
(key,value)=(match.group(1),match.group(2))
748-
match=re.search("\"(\S+)\"",value)
740+
match=re.search("\"(.+)\"",value)
749741
if(match): value=match.group(1)
750742
pairs.append([key,value])
751743
return pairs

0 commit comments

Comments
 (0)
Please sign in to comment.