update

Bill Majoros · Bill Majoros · commit ba353e9920c1 · 2021-05-07T13:50:19.000-04:00
diff --git a/CigarOp.py b/CigarOp.py
@@ -9,7 +9,7 @@
    chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
 from Interval import Interval
 
-ADVANCE_QUERY=set(["M","I","S","=","X"])
+ADVANCE_QUERY=set(["M","I","S","H","=","X"])
 ADVANCE_REF=set(["M","D","N","=","X"])
 
 #=========================================================================
diff --git a/DataFrame.py b/DataFrame.py
@@ -9,11 +9,13 @@
    chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
 import sys
 from DataFrameRow import DataFrameRow
+from Rex import Rex
+rex=Rex()
 
 #=========================================================================
 # Attributes:
 #   header
-#   matrix : array of rows, each of which is an array of data values
+#   matrix : array of rows, each of which is a DataFrameRow
 #   rowHash : dictionary mapping row names to row indices
 #   colHash : dictionary mapping column names to column indices
 # Methods:
@@ -31,6 +33,7 @@
 #   df.toFloat()
 #   df.colToFloat(colIndex)
 #   header=df.getHeader()
+#   df.removeQuotes()
 #   df.hashRowNames()
 #   df.hashColNames()
 #   row=df.getRowI(i)
@@ -70,6 +73,20 @@ def addRow(self,row):
    def getRows(self):
       return self.matrix
 
+   def removeQuotes(self):
+      for row in self.matrix:
+         raw=row.getRaw()
+         for i in range(len(raw)):
+            if(rex.find("\"\s*(\S+)\"",raw[i])):
+               raw[i]=rex[1]
+      self.unquoteHeader()
+
+   def unquoteHeader(self):
+      raw=self.header
+      for i in range(len(raw)):
+         if(rex.find("\"\s*(\S+)\"",raw[i])):
+            raw[i]=rex[1]
+
    def toDataArray(self):
       array=[]
       for row in self.matrix:
diff --git a/DataFrameRow.py b/DataFrameRow.py
@@ -25,13 +25,31 @@
 #   row.append(value)
 #   row.print(handle)
 #   newRow=row.clone()
+#   row.log()
+#   row.log2()
+#   row.log10()
 #=========================================================================
 
 class DataFrameRow:
    def __init__(self):
       self.label=""
       self.values=[]
 
+   def log(self):
+      values=self.values
+      for i in range(len(values)):
+         values[i]=log(values[i])
+
+   def log2(self):
+      values=self.values
+      for i in range(len(values)):
+         values[i]=log2(values[i])
+
+   def log10(self):
+      values=self.values
+      for i in range(len(values)):
+         values[i]=log10(values[i])
+
    def getRaw(self):
       return self.values
 
diff --git a/SamRecord.py b/SamRecord.py
@@ -82,10 +82,10 @@ def parseMDtag(self):
             if(rex.find("^(\d+)(.*)",md)):
                 fields.append(rex[1])
                 md=rex[2]
-            elif(rex.find("^([ACGT])(.*)",md)):
+            elif(rex.find("^([ACGTN])(.*)",md)):
                 fields.append(rex[1])
                 md=rex[2]
-            elif(rex.find("^(\^[ACGT]+)(.*)",md)):
+            elif(rex.find("^(\^[ACGTN]+)(.*)",md)):
                 fields.append(rex[1])
                 md=rex[2]
             else:
diff --git a/SummaryStats.py b/SummaryStats.py
@@ -18,6 +18,8 @@
 #    sum=SummaryStats.sum(array)
 #    r=SummaryStats.correlation(array1,array2)
 #    m=SummaryStats.median(array)
+#    array=SummaryStats.getQuantiles(values,numQuantiles)
+#    (mean,SD,Min,Max)=SummaryStats.trimmedStats(array,percent)
 ######################################################################
 
 class SummaryStats:
@@ -36,6 +38,32 @@ def median(self,array):
         if(n%2==1): return a[halfN]
         return (a[halfN-1]+a[halfN])/2
 
+    @classmethod
+    def getQuantiles(self,values,numQuantiles):
+        a=[]
+        for x in values: a.append(x)
+        a.sort()
+        n=len(a)
+        q=[0]
+        index=0
+        for i in range(1,numQuantiles):
+            index=int(float(i)/float(numQuantiles)*float(n))
+            q.append(a[index])
+        q.append(a[n-1])
+        return q
+
+    @classmethod
+    def trimmedStats(self,array,percent):
+        sorted=[x for x in array]
+        sorted.sort()
+        n=len(array)
+        keep=percent*n
+        omit=n-keep
+        first=int(omit/2)
+        keep=int(keep)
+        sorted=sorted[first:(first+keep)]
+        return SummaryStats.summaryStats(sorted)
+
     @classmethod
     def summaryStats(self,array):
         n=len(array)
diff --git a/template.py b/template.py
@@ -1,8 +1,6 @@
 #!/usr/bin/env python
 #=========================================================================
-# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
-# License (GPL) version 3, as described at www.opensource.org.
-# Author: William H. Majoros (bmajoros@alumni.duke.edu)
+# Copyright (C)William H. Majoros (bmajoros@alumni.duke.edu)
 #=========================================================================
 from __future__ import (absolute_import, division, print_function, 
    unicode_literals, generators, nested_scopes, with_statement)