Skip to content

Commit 7fbc5f0

Browse files
author
Bill Majoros
committed
update
1 parent f5a433f commit 7fbc5f0

9 files changed

+68
-25
lines changed

Codon.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
class Codon:
3737
def __init__(self,exon,triplet,relative,absolute,isInterrupted):
38-
if(not relative): raise Exception("relative is not set")
38+
if(relative is None): raise Exception("relative is not set")
3939
self.triplet=triplet
4040
self.exon=exon
4141
self.relativeCoord=relative

CodonIterator.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@
3434
class CodonIterator:
3535
def __init__(self,transcript,axisSequenceRef,stopCodons):
3636
self.transcript=transcript
37-
self.stopCodon=selCodons
37+
self.stopCodons=stopCodons
3838

3939
# Advance to the exon containing the start codon
4040
exons=transcript.exons
4141
strand=transcript.strand
4242
startCodon=transcript.startCodonAbsolute
4343
if(startCodon):
4444
if(len(axisSequenceRef)==0): raise Exception("empty sequence")
45-
transcript.loadExonSequences(axisSequenceRef)
45+
transcript.loadExonSequences(axisSequenceRef,transcript.exons)
4646
numExons=len(exons)
4747
exon=None
4848
for i in range(0,numExons):
@@ -120,7 +120,7 @@ def nextCodon(self):
120120
self.exon=nextExon
121121
else: # codon was not interrupted by end of exon
122122
triplet=exonSeq[relative:relative+3]
123-
if(isStopCodon[triplet]): self.exon=None
123+
if(isStopCodon.get(triplet,False)): self.exon=None
124124
else:
125125
self.relative+=3
126126
self.absolute+=3 if strand=="+" else -3

Exon.py

+5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
unicode_literals, generators, nested_scopes, with_statement)
88
from builtins import (bytes, dict, int, list, object, range, str, ascii,
99
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
10+
from Interval import Interval
1011

1112
######################################################################
1213
#
@@ -53,6 +54,7 @@
5354
# gff=exon.toGff()
5455
# begin=exon.getBegin()
5556
# end=exon.getEnd()
57+
# interval=exon.asInterval()
5658
# frame=exon.getFrame()
5759
# exon.setFrame(frame)
5860
# type=exon.getType()
@@ -87,6 +89,9 @@ def setEnd(self,end):
8789
def containsCoordinate(self,x):
8890
return x>=self.begin and x<self.end
8991

92+
def asInterval(self):
93+
return Interval(self.begin,self.end)
94+
9095
def getLength(self):
9196
return self.end-self.begin
9297

FastaReader.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
# reader.doUppercase()
2424
# Class Methods:
2525
# size=FastaReader.getSize(filename)
26+
# num=FastaReader.countEntries(filename)
2627
# FastaReader.readAll(filename) # returns hash : id->sequence
2728
# FastaReader.readAllAndKeepDefs(filename) # returns hash : id->[def,seq]
2829
# (defline,seq)=FastaReader.firstSequence(filename)
@@ -94,13 +95,23 @@ def firstSequence(cls,filename):
9495
reader.close()
9596
return [defline,seq]
9697

98+
@classmethod
99+
def countEntries(cls,filename):
100+
n=0
101+
reader=FastaReader(filename)
102+
while(True):
103+
(defline,seq)=reader.nextSequence()
104+
if(not defline): break
105+
n+=1
106+
return n
107+
97108
@classmethod
98109
def readAll(cls,filename):
99110
hash={}
100111
reader=FastaReader(filename)
101112
while(True):
102113
[defline,seq]=reader.nextSequence()
103-
if(not defline): break;
114+
if(not defline): break
104115
match=re.search("^\s*>(\S+)",defline)
105116
if(not match): raise Exception("can't parse defline: "+defline)
106117
id=match.group(1)
@@ -114,7 +125,7 @@ def readAllAndKeepDefs(cls,filename):
114125
reader=FastaReader(filename)
115126
while(True):
116127
[defline,seq]=reader.nextSequence()
117-
if(not defline): break;
128+
if(not defline): break
118129
match=re.search("^\s*>(\S+)",defline)
119130
if(not match): raise Exception("can't parse defline: "+defline)
120131
id=match.group(1)

GFF3Parser.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def labelStructure(self,root):
150150
t=root["type"]
151151
if(t=="gene"):
152152
obj=self.makeGene(root)
153-
elif(t=="transcript"):
153+
elif(t=="transcript" or t=="mRNA"):
154154
obj=self.makeTranscript(root)
155155
elif(t=="exon" or t=="CDS"):
156156
obj=self.makeExon(root)
@@ -335,4 +335,4 @@ def test_parser7(filename):
335335
for gene in genes:
336336
print(gene.toGff())
337337

338-
test_parser7("/Users/bmajoros/python/test/data/subset.gff3")
338+
#test_parser7("/Users/bmajoros/python/test/data/subset.gff3")

GffTranscriptReader.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def loadGFF_transcript(self,fields,line,transcriptBeginEnd,GFF,
197197
transcriptId=rex[1]
198198
transcriptBeginEnd[transcriptId]=[begin,end]
199199
strand=fields[6]
200+
score=fields[5]
200201
transcriptExtraFields=""
201202
for i in range(8,len(fields)):
202203
transcriptExtraFields+=fields[i]+" "
@@ -211,6 +212,8 @@ def loadGFF_transcript(self,fields,line,transcriptBeginEnd,GFF,
211212
transcript.source=fields[1]
212213
transcript.setBegin(begin)
213214
transcript.setEnd(end)
215+
if(transcript.score is None and
216+
score!="."): transcript.score=float(score)
214217
geneId=None
215218
if(rex.find("genegrp=(\S+)",line)): geneId=rex[1]
216219
elif(rex.find('gene_id[:=]?\s*\"?([^\s\;"]+)\"?',line)):
@@ -389,26 +392,21 @@ def loadGFF(self,gffFilename):
389392
transcriptBeginEnd={}
390393
while(True):
391394
line=GFF.readline()
392-
#print("LINE="+line)
393395
if(not line): break
394396
if(not re.search("\S+",line)): continue
395397
if(re.search("^\s*\#",line)): continue
396398
fields=line.split("\t") ### \t added 3/24/2017
397399
if(len(fields)<8): raise Exception("can't parse GTF:"+line)
398400
if(fields[2]=="transcript"):
399-
#print("loading transcript line")
400401
self.loadGFF_transcript(fields,line,transcriptBeginEnd,GFF,
401402
transcripts,readOrder,genes)
402403
elif("UTR" in fields[2] or "utr" in fields[2]):
403-
#print("loading UTR")
404404
self.loadGFF_UTR(fields,line,transcriptBeginEnd,GFF,
405405
transcripts,readOrder,genes)
406406
elif(fields[2]=="exon"):
407-
#print("loading exon: "+line)
408407
self.loadGFF_exon(fields,line,transcriptBeginEnd,GFF,
409408
transcripts,readOrder,genes)
410409
elif("CDS" in fields[2] or "-exon" in fields[2]):
411-
#print("loading CDS")
412410
self.loadGFF_CDS(fields,line,transcriptBeginEnd,GFF,
413411
transcripts,readOrder,genes)
414412
GFF.close()

Interval.py

+8
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
# union=interval.union(other) # returns an array of intervals
2525
# diff=interval.minus(other) # returns an array of intervals
2626
# length=interval.getLength()
27+
# begin=interval.getBegin()
28+
# end=interval.getEnd()
2729
# length=interval.length()
2830
# bool=interval.equals($other)
2931
# other=interval.clone()
@@ -82,6 +84,12 @@ def length(self):
8284
def getLength(self):
8385
return self.length()
8486

87+
def getBegin(self):
88+
return self.begin
89+
90+
def getEnd(self):
91+
return self.end
92+
8593
def equals(self,other):
8694
return self.begin==other.begin and self.end==other.end
8795

SlurmWriter.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@
2323
# writer.mem(1500)
2424
# writer.threads(16)
2525
# writer.setQueue("new,all")
26-
# writer.writeArrayScript(slurmDir,jobName,runDir,maxParallel,
26+
# writer.writeArrayScript(slurmDir,jobName,maxParallel,
2727
# additional_SBATCH_lines)
2828
#=========================================================================
2929
class SlurmWriter:
3030
"""SlurmWriter"""
3131
def __init__(self):
3232
self.commands=[]
3333
self.niceValue=0
34-
self.MemValue=None
34+
self.memValue=0
3535
self.threadsValue=0
3636
self.queue=None
3737

@@ -50,8 +50,7 @@ def threads(self,value):
5050
def setQueue(self,value):
5151
self.queue=value
5252

53-
def writeArrayScript(self,slurmDir,jobName,runDir,maxParallel,
54-
moreSBATCH=""):
53+
def writeArrayScript(self,slurmDir,jobName,maxParallel,moreSBATCH=""):
5554
if(moreSBATCH is None): moreSBATCH=""
5655
if(maxParallel<1): raise Exception("specify maxParallel parameter")
5756
moreSBATCH=moreSBATCH.rstrip()

Transcript.py

+30-8
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
# regardless of strand
2626
# startCodonAbsolute : absolute coordinates of start codon,
2727
# relative to genomic axis
28+
# score : float
2829
# strand : + or -
2930
# exons : pointer to array of Exons (which are actually CDS segments)
3031
# UTR : pointer to array of UTR segments
@@ -124,13 +125,16 @@ class Transcript:
124125
def __init__(self,id,strand=None):
125126
if(type(id)!=EssexNode): # not an EssexNode
126127
self.transcriptId=id
128+
self.score=None
127129
self.strand=strand
128130
self.exons=[]
129131
self.UTR=[]
130132
self.rawExons=None
131133
self.stopCodons={"TAG":1,"TGA":1,"TAA":1}
132134
self.startCodon=None
133135
self.extraFields=None
136+
self.structureChange=None
137+
self.fate=None
134138
else: # EssexNode
135139
essex=id
136140
self.transcriptId=essex.getAttribute("ID")
@@ -140,12 +144,24 @@ def __init__(self,id,strand=None):
140144
self.end=essex.getAttribute("end")
141145
self.geneId=essex.getAttribute("gene")
142146
self.substrate=essex.getAttribute("substrate")
147+
score=essex.getAttribute("score")
148+
self.score=float(score) if score!="." else None
143149
self.exons=[]
144150
self.UTR=[]
145151
self.rawExons=None
146152
self.startCodon=None
147153
self.extraFields=None
148154
self.stopCodons={"TAG":1,"TGA":1,"TAA":1}
155+
changeNode=essex.findChild("structure-change")
156+
if(changeNode is not None):
157+
changeString=""
158+
numElem=changeNode.numElements()
159+
for i in range(0,numElem):
160+
elem=changeNode.getIthElem(i)
161+
if(EssexNode.isaNode(elem)): continue
162+
if(changeString!=""): changeString+=" "
163+
changeString+=elem
164+
if(len(changeString)>0): self.structureChange=changeString
149165
exons=self.exons
150166
UTR=self.UTR
151167
exonsElem=essex.findChild("exons")
@@ -442,9 +458,14 @@ def toGff(self):
442458
strand=self.strand
443459
extra=""
444460
if(re.search("\S",extraFields)): extra="; "+extraFields
461+
change=self.structureChange
462+
if(change): extra+="; structure_change \""+change+"\""
463+
score=self.score
464+
if(score is None): score="."
445465
gff+=substrate+"\t"+source+"\ttranscript\t"+str(begin)+"\t" \
446-
+str(end)+"\t.\t"+strand+"\t.\ttranscript_id \""+transID+ \
447-
"\"; gene_id \""+geneID+"\""+extra+"\n"
466+
+str(end)+"\t"+str(score)+"\t"+strand+ \
467+
"\t.\ttranscript_id \""+ \
468+
transID+ "\"; gene_id \""+geneID+"\""+extra+"\n"
448469
for exon in exons: gff+=exon.toGff()
449470
UTR=self.UTR
450471
for exon in UTR:
@@ -596,12 +617,13 @@ def trimUTR(self,axisSequenceRef):
596617
self.recomputeBoundaries()
597618

598619
def getScore(self):
599-
exons=self.exons
600-
score=0;
601-
for exon in exons:
602-
exonScore=exon.getScore()
603-
if(exonScore!="."): score+=exonScore
604-
return score
620+
return self.score
621+
#exons=self.exons
622+
#score=0;
623+
#for exon in exons:
624+
# exonScore=exon.getScore()
625+
# if(exonScore!="."): score+=exonScore
626+
#return score
605627

606628
def getIntrons(self):
607629
exons=self.getRawExons()

0 commit comments

Comments
 (0)