Skip to content

Commit ba353e9

Browse files
author
Bill Majoros
committed
update
1 parent 5257d7b commit ba353e9

6 files changed

+68
-7
lines changed

CigarOp.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
1010
from Interval import Interval
1111

12-
ADVANCE_QUERY=set(["M","I","S","=","X"])
12+
ADVANCE_QUERY=set(["M","I","S","H","=","X"])
1313
ADVANCE_REF=set(["M","D","N","=","X"])
1414

1515
#=========================================================================

DataFrame.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@
99
chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
1010
import sys
1111
from DataFrameRow import DataFrameRow
12+
from Rex import Rex
13+
rex=Rex()
1214

1315
#=========================================================================
1416
# Attributes:
1517
# header
16-
# matrix : array of rows, each of which is an array of data values
18+
# matrix : array of rows, each of which is a DataFrameRow
1719
# rowHash : dictionary mapping row names to row indices
1820
# colHash : dictionary mapping column names to column indices
1921
# Methods:
@@ -31,6 +33,7 @@
3133
# df.toFloat()
3234
# df.colToFloat(colIndex)
3335
# header=df.getHeader()
36+
# df.removeQuotes()
3437
# df.hashRowNames()
3538
# df.hashColNames()
3639
# row=df.getRowI(i)
@@ -70,6 +73,20 @@ def addRow(self,row):
7073
def getRows(self):
7174
return self.matrix
7275

76+
def removeQuotes(self):
77+
for row in self.matrix:
78+
raw=row.getRaw()
79+
for i in range(len(raw)):
80+
if(rex.find("\"\s*(\S+)\"",raw[i])):
81+
raw[i]=rex[1]
82+
self.unquoteHeader()
83+
84+
def unquoteHeader(self):
85+
raw=self.header
86+
for i in range(len(raw)):
87+
if(rex.find("\"\s*(\S+)\"",raw[i])):
88+
raw[i]=rex[1]
89+
7390
def toDataArray(self):
7491
array=[]
7592
for row in self.matrix:

DataFrameRow.py

+18
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,31 @@
2525
# row.append(value)
2626
# row.print(handle)
2727
# newRow=row.clone()
28+
# row.log()
29+
# row.log2()
30+
# row.log10()
2831
#=========================================================================
2932

3033
class DataFrameRow:
3134
def __init__(self):
3235
self.label=""
3336
self.values=[]
3437

38+
def log(self):
39+
values=self.values
40+
for i in range(len(values)):
41+
values[i]=log(values[i])
42+
43+
def log2(self):
44+
values=self.values
45+
for i in range(len(values)):
46+
values[i]=log2(values[i])
47+
48+
def log10(self):
49+
values=self.values
50+
for i in range(len(values)):
51+
values[i]=log10(values[i])
52+
3553
def getRaw(self):
3654
return self.values
3755

SamRecord.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ def parseMDtag(self):
8282
if(rex.find("^(\d+)(.*)",md)):
8383
fields.append(rex[1])
8484
md=rex[2]
85-
elif(rex.find("^([ACGT])(.*)",md)):
85+
elif(rex.find("^([ACGTN])(.*)",md)):
8686
fields.append(rex[1])
8787
md=rex[2]
88-
elif(rex.find("^(\^[ACGT]+)(.*)",md)):
88+
elif(rex.find("^(\^[ACGTN]+)(.*)",md)):
8989
fields.append(rex[1])
9090
md=rex[2]
9191
else:

SummaryStats.py

+28
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
# sum=SummaryStats.sum(array)
1919
# r=SummaryStats.correlation(array1,array2)
2020
# m=SummaryStats.median(array)
21+
# array=SummaryStats.getQuantiles(values,numQuantiles)
22+
# (mean,SD,Min,Max)=SummaryStats.trimmedStats(array,percent)
2123
######################################################################
2224

2325
class SummaryStats:
@@ -36,6 +38,32 @@ def median(self,array):
3638
if(n%2==1): return a[halfN]
3739
return (a[halfN-1]+a[halfN])/2
3840

41+
@classmethod
42+
def getQuantiles(self,values,numQuantiles):
43+
a=[]
44+
for x in values: a.append(x)
45+
a.sort()
46+
n=len(a)
47+
q=[0]
48+
index=0
49+
for i in range(1,numQuantiles):
50+
index=int(float(i)/float(numQuantiles)*float(n))
51+
q.append(a[index])
52+
q.append(a[n-1])
53+
return q
54+
55+
@classmethod
56+
def trimmedStats(self,array,percent):
57+
sorted=[x for x in array]
58+
sorted.sort()
59+
n=len(array)
60+
keep=percent*n
61+
omit=n-keep
62+
first=int(omit/2)
63+
keep=int(keep)
64+
sorted=sorted[first:(first+keep)]
65+
return SummaryStats.summaryStats(sorted)
66+
3967
@classmethod
4068
def summaryStats(self,array):
4169
n=len(array)

template.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
#!/usr/bin/env python
22
#=========================================================================
3-
# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
4-
# License (GPL) version 3, as described at www.opensource.org.
5-
# Author: William H. Majoros (bmajoros@alumni.duke.edu)
3+
# Copyright (C)William H. Majoros (bmajoros@alumni.duke.edu)
64
#=========================================================================
75
from __future__ import (absolute_import, division, print_function,
86
unicode_literals, generators, nested_scopes, with_statement)

0 commit comments

Comments
 (0)