update

Bill Majoros · Bill Majoros · commit f5a433fbc3d1 · 2017-03-24T13:25:14.000-04:00
diff --git a/EssexNode.py b/EssexNode.py
@@ -23,6 +23,7 @@
 #   elements : array
 # Methods:
 #   node=EssexNode([tag,elem1,elem2,...])
+#   node->addElem(elem)
 #   tag=node.getTag()
 #   node.changeTag(newTag)
 #   n=node.numElements()
@@ -37,6 +38,7 @@
 #   n=node.countDescendents(tag)
 #   bool=node.hasDescendent(tag)
 #   string=node.getAttribute(attributeTag)
+#   node.setAttribute(tag,value)
 #   array=node.getElements()
 #   bool=EssexNode.isaNode(datum)
 #   bool=node.hasCompositeChildren()
@@ -67,6 +69,17 @@ def __init__(self,parms):
             self.tag=""
             self.elements=[]
 
+    def addElem(self,elem):
+        self.elements.append(elem)
+
+    def setAttribute(self,tag,value):
+        elements=self.elements
+        for elem in elements:
+            if(EssexNode.isaNode(elem) and elem.getTag()==tag):
+                elem.setIthElem(0,value)
+                return
+        elements.append(EssexNode([tag,value]))
+
     def getTag(self):
         return self.tag
 
@@ -227,7 +240,7 @@ def printRecursive(self,depth,file):
         else:
              for i in range(n):
                  elem=elements[i]
-                 file.write(" "+elem)
+                 file.write(" "+str(elem))
         file.write(")")
 
     def printExonXML(self,tag,tab,depth,file):
diff --git a/GffTranscriptReader.py b/GffTranscriptReader.py
@@ -179,9 +179,11 @@ def adjustStartCodons(self,transcripts):
             startCodon=None
             totalIntronSize=Integer(0)
             if(strand=="+"):
-                startCodon=self.adjustStartCodons_fw(transcript,totalIntronSize)
+                startCodon=\
+                    self.adjustStartCodons_fw(transcript,totalIntronSize)
             else:
-                startCodon=self.adjustStartCodons_bw(transcript,totalIntronSize)
+                startCodon=\
+                    self.adjustStartCodons_bw(transcript,totalIntronSize)
             if(startCodon is not None):
                 startCodon-=int(totalIntronSize)
                 transcript.startCodon=startCodon
@@ -391,7 +393,7 @@ def loadGFF(self,gffFilename):
             if(not line): break
             if(not re.search("\S+",line)): continue
             if(re.search("^\s*\#",line)): continue
-            fields=line.split()
+            fields=line.split("\t") ### \t added 3/24/2017
             if(len(fields)<8): raise Exception("can't parse GTF:"+line)
             if(fields[2]=="transcript"):
                 #print("loading transcript line")
diff --git a/Rex.py b/Rex.py
@@ -13,9 +13,10 @@
 # Attributes:
 #   match : returned from re.search()
 # Instance Methods:
-#   r=Rex()
-#   bool=r.find("abc(\d+)def(\d+)ghi(\d+)",line)
-#   x=r[1]; y=r[2]; z=r[3]
+#   rex=Rex()
+#   bool=rex.find("abc(\d+)def(\d+)ghi(\d+)",line)
+#   rex.findOrDie("abc(\d+)def(\d+)ghi(\d+)",line)
+#   x=rex[1]; y=rex[2]; z=rex[3]
 #=========================================================================
 class Rex:
     """Rex -- more compact regular expression matching similar to Perl"""
@@ -27,6 +28,9 @@ def find(self,pattern,line):
         self.match=re.search(pattern,line)
         return self.match is not None
 
+    def findOrDie(self,pattern,line):
+        if(not self.find(pattern,line)): raise Exception("can't parse: "+line)
+
     def __getitem__(self,index):
         return self.match.group(index)
 
diff --git a/Transcript.py b/Transcript.py
@@ -527,7 +527,7 @@ def trimUTR(self,axisSequenceRef):
                 numExons-=1
                 j-=1
                 startCodon-=length
-                self.adjustOrders() ### 4/1/03
+                self.adjustOrders()
             else:
                 if(strand=="+"):
                     exon.trimInitialPortion(startCodon)
@@ -692,38 +692,30 @@ def setStrand(self,strand):
         for exon in exons: exon.setStrand(strand)
 
     def getRawExons(self):
-        #print("getRawExons",self.getID())
         rawExons=self.rawExons
         if(not rawExons or len(rawExons)==0):
             exons=self.exons
             UTR=self.UTR
             rawExons=[]
             for exon in exons: 
-                #print("exon",exon.getLength())
                 if(exon.getLength()>0): rawExons.append(exon.copy())
             for utr in UTR: 
-                #print("UTR",utr.getLength())
                 if(utr.getLength()>0): rawExons.append(utr.copy())
         # Sort into chromosome order (temporarily):
         rawExons.sort(key=lambda exon: exon.begin)
         # Now coalesce any UTR-exon pairs that are adjacent:
         n=len(rawExons)
-        #print(n,"exons")
         i=0
         while(i<n):
             exon=rawExons[i]
-            #print("exon",i,exon.getBegin(),exon.getEnd())
             exon.setType("exon")
             if(i+1<n):
                 nextExon=rawExons[i+1]
-                #print("next exon",i+1,nextExon.getBegin(),nextExon.getEnd())
                 if(exon.getEnd()==nextExon.getBegin() or
                    exon.getEnd()==nextExon.getBegin()-1):
                     exon.setEnd(nextExon.getEnd())
                     nextExon=None
-                    #print("deleting exon",i+1,"count="+str(len(rawExons)))
                     rawExons.pop(i+1)
-                    #print("now count="+str(len(rawExons)))
                     n-=1
                     i-=1
             i+=1
@@ -742,10 +734,10 @@ def parseExtraFields(self):
         if(string is None): return pairs
         fields=string.split(";")
         for field in fields:
-            match=re.search("(\S+)[\s=]+(\S+)",field)
+            match=re.search("(\S+)[\s=]+([^;]+)",field)
             if(not match): continue
             (key,value)=(match.group(1),match.group(2))
-            match=re.search("\"(\S+)\"",value)
+            match=re.search("\"(.+)\"",value)
             if(match): value=match.group(1)
             pairs.append([key,value])
         return pairs