update

Bill Majoros · Bill Majoros · commit 322416a5f6b2 · 2020-06-03T13:15:00.000-04:00
diff --git a/DataFrame.py b/DataFrame.py
@@ -172,7 +172,9 @@ def nrow(self):
       return len(self.matrix)
 
    def ncol(self):
-      return len(self.header)
+      if(len(self.header)!=0): return len(self.header)
+      if(len(self.matrix)==0): return 0
+      return self.matrix[0].length()
 
    def __getitem__(self,i):
       return self.matrix[i]
diff --git a/MatrixMarket.py b/MatrixMarket.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+#=========================================================================
+# This is OPEN SOURCE SOFTWARE governed by the Gnu General Public
+# License (GPL) version 3, as described at www.opensource.org.
+# Author: William H. Majoros (bmajoros@alumni.duke.edu)
+#=========================================================================
+from __future__ import (absolute_import, division, print_function, 
+   unicode_literals, generators, nested_scopes, with_statement)
+from builtins import (bytes, dict, int, list, object, range, str, ascii,
+   chr, hex, input, next, oct, open, pow, round, super, filter, map, zip)
+# The above imports should allow this program to run in both Python 2 and
+# Python 3.  You might need to update your version of module "future".
+import sys
+import ProgramName
+import gzip
+from Rex import Rex
+rex=Rex()
+
+#=========================================================================
+# Attributes:
+#    FH : file handle
+#    header : array of string
+#    nextLine : string
+# Instance Methods:
+#    MatrixMarket(filename)
+#    nextGroup(self,colIndex)
+#    getHeader()
+# Class Methods:
+#    allGroups=loadFile(filename,colIndex)
+#=========================================================================
+class MatrixMarket:
+    def __init__(self,filename):
+        if(rex.find("\.gz$",filename)):
+            self.FH=gzip.open(filename,"rt")
+        else:
+            self.FH=open(filename,"rt")
+        self.header=None
+        self.nextLine=None
+
+    def getHeader(self):
+        return self.header
+
+    def nextGroup(self,colIndex):
+        line=None
+        # First, see if the header needs to be parsed
+        while(True):
+            line=self.nextLine
+            if(line is None): line=self.FH.readline()
+            if(line is None): return None
+            L=len(line)
+            if(L>0 and line[0]=="%"): continue
+            break
+        # The first non-comment line contains the totals
+        if(self.header is None):
+            self.header=line.rstrip().split()
+            self.header=[int(x) for x in self.header]
+            line=self.FH.readline()
+        # Now we can read in the next group of lines
+        prevID=None
+        group=[]
+        if(self.nextLine is not None): # buffered from previous call
+            prevID=int(self.nextLine.rstrip().split()[colIndex])
+        while(True):
+            fields=line.rstrip().split()
+            if(prevID is None): prevID=int(fields[colIndex])
+            thisID=int(fields[colIndex])
+            if(thisID!=prevID):
+                self.nextLine=line
+                return group
+            group.append(fields)
+            line=self.FH.readline()
+            if(line is None): return None
+
+    @classmethod
+    def loadFile(self,filename,colIndex):
+        reader=MatrixMarket(filename)
+        groups=[]
+        while(True):
+            group=reader.nextGroup(colIndex)
+            if(group is None): break
+            groups.append(group)
+        return groups
diff --git a/Pipe.py b/Pipe.py
@@ -16,6 +16,7 @@
 # Instance Methods:
 #   pipe=Pipe(command)
 #   line=pipe.readline()
+# Class Methods:
 #   output=Pipe.run(command)
 #=========================================================================
 class Pipe:
diff --git a/Stan.py b/Stan.py
@@ -17,7 +17,14 @@
 #    
 # Methods:
 #    stan=Stan(model)
-#    stan.run(numWarmup,numSamples,inputFile,outputFile,stderrFile,initFile=None):
+#    stan.run(numWarmup,numSamples,inputFile,outputFile,stderrFile,
+#         initFile=None):
+#    stan.variational(numSamples,inputFile,outputFile,stderrFile,
+#         initFile=None):
+#    cmd=stan.getCmd(numWarmup,numSamples,inputFile,outputFile,
+#         stderrFile,initFile=None)
+#    cmd=stan.getVarCmd(numSamples,inputFile,outputFile,
+#         stderrFile,initFile=None)
 #    stan.writeOneDimArray(name,array,dim,OUT):
 #    stan.writeTwoDimArray(name,array,firstDim,secondDim,OUT):
 #    stan.writeThreeDimArray(name,array,firstDim,secondDim,thirdDim,OUT):
@@ -82,6 +89,10 @@ def run(self,numWarmup,numSamples,inputFile,outputFile,stderrFile,initFile=None)
         cmd=self.getCmd(numWarmup,numSamples,inputFile,outputFile,stderrFile,initFile)
         os.system(cmd)
 
+    def variational(self,numSamples,inputFile,outputFile,stderrFile,initFile=None):
+        cmd=self.getVarCmd(numSamples,inputFile,outputFile,stderrFile,initFile)
+        os.system(cmd)
+
     def getCmd(self,numWarmup,numSamples,inputFile,outputFile,stderrFile,initFile=None):
         init=" init="+initFile if initFile is not None else ""
         cmd=self.model+" sample thin=1"+\
@@ -91,4 +102,13 @@ def getCmd(self,numWarmup,numSamples,inputFile,outputFile,stderrFile,initFile=No
             init+\
             " output file="+outputFile+" refresh=0 > "+stderrFile
         return cmd
+
+    def getVarCmd(self,numSamples,inputFile,outputFile,stderrFile,initFile=None):
+        init=" init="+initFile if initFile is not None else ""
+        cmd=self.model+" variational "+\
+            " output_samples="+str(numSamples)+\
+            " data file="+inputFile+\
+            init+\
+            " output file="+outputFile+" refresh=0 > "+stderrFile
+        return cmd
         
diff --git a/StanParser.py b/StanParser.py
@@ -25,6 +25,8 @@
 #    (median,mean,SD,min,max)=parser.getSummary(var)
 #    (CI_left,CI_right)=parser.getCredibleInterval(0.95,variableName)
 #    (median,CI_left,CI_right)=parser.getMedianAndCI(0.95,variableName)
+#    P=parser.getLeftTail(variableName,value)
+#    P=parser.getRightTail(variableName,value)
 ######################################################################
 
 class StanParser:
@@ -43,6 +45,20 @@ def getCredibleInterval(self,percent,name):
         CI_right=samples[n-int(half*n)]
         return (CI_left,CI_right)
 
+    def getLeftTail(self,name,value):
+        samples=self.getVariable(name)
+        count=0
+        for x in samples:
+            if(x<value): count+=1
+        return float(count)/float(len(samples))
+
+    def getRightTail(self,name,value):
+        samples=self.getVariable(name)
+        count=0
+        for x in samples:
+            if(x>value): count+=1
+        return float(count)/float(len(samples))
+
     def getMedianAndCI(self,percent,name):
         samples=self.getVariable(name)
         n=len(samples)
@@ -58,23 +74,36 @@ def parse(self,filename):
             return self.parseFile(IN)
 
     def parseFile(self,IN):
+        firstIndex=None
         for line in IN:
             if(len(line)<1): continue
             if(line[0]=="#"): continue
             fields=line.rstrip().split(",")
             if(len(fields)<1): continue
-            if(fields[0]=="lp__"): self.parseVarNames(fields)
-            else: self.parseSample(fields)
+            if(fields[0]=="lp__"): 
+                firstIndex=self.getFirstVariableIndex(fields)
+                self.parseVarNames(fields,firstIndex)
+            else: self.parseSample(fields,firstIndex)
+
+    def getFirstVariableIndex(self,fields):
+        for i in range(len(fields)):
+            field=fields[i]
+            L=len(field)
+            lastChar=field[L-1]
+            if(lastChar!="_"): return i
+        return -1
 
-    def parseVarNames(self,fields):
-        self.varNames=fields[7:]
+    def parseVarNames(self,fields,firstIndex):
+        self.varNames=fields[firstIndex:]
+        #print("firstIndex=",firstIndex,"names=",self.varNames)
         for i in range(len(self.varNames)):
             self.varIndex[self.varNames[i]]=i
 
-    def parseSample(self,fields):
-        sample=fields[7:]
+    def parseSample(self,fields,firstIndex):
+        sample=fields[firstIndex:]
         for i in range(len(sample)):
             sample[i]=float(sample[i])
+            #print("sample=",sample[i],"firstIndex=",firstIndex)
         self.samples.append(sample)
 
     def getSamples(self):