Adaptive Samplers (plus Dummy IO fix) (#459)

* adaptive sparse grid, sobol working * Adaptive sampling, plus Dummy-based rlz updates from input, output * cleanup * fixed prefix as numpy array
idaholab · Dec 10, 2017 · 9b17e91 · 9b17e91
1 parent ddf3fc7
commit 9b17e91
Show file tree

Hide file tree

Showing 34 changed files with 224 additions and 473 deletions.
diff --git a/framework/Models/Code.py b/framework/Models/Code.py
@@ -712,7 +712,7 @@ def collectOutputFromDict(self,exportDict,output,options=None):
       @ In, options, dict, optional, dictionary of options that can be passed in when the collect of the output is performed by another model (e.g. EnsembleModel)
       @ Out, None
     """
-    prefix = exportDict.pop('prefix')
+    prefix = exportDict.pop('prefix',None)
     if 'inputSpaceParams' in exportDict.keys():
       inKey = 'inputSpaceParams'
       outKey = 'outputSpaceParams'

diff --git a/framework/Models/Dummy.py b/framework/Models/Dummy.py
@@ -167,10 +167,10 @@ def evaluateSample(self, myInput, samplerType, kwargs):
     """
     Input = self.createNewInput(myInput, samplerType, **kwargs)
     inRun = self._manipulateInput(Input[0])
-    rlz = {}
-    rlz.update(inRun)
-    rlz.update(kwargs)
-    rlz['OutputPlaceHolder'] = np.atleast_1d(np.float(Input[1]['prefix']))
+    # build realization using input space from inRun and metadata from kwargs
+    rlz = dict((var,np.atlesat_1d(inRun[var] if var in kwargs['SampledVars'] else kwargs[var])) for var in set(kwargs.keys()+inRun.keys()))
+    # add dummy output space
+    rlz['OutputPlaceHolder'] = np.atleast_1d(float(Input[1]['prefix'][0]))
     return rlz
 
   def collectOutput(self,finishedJob,output,options=None):
@@ -185,6 +185,7 @@ def collectOutput(self,finishedJob,output,options=None):
     # TODO apparently sometimes "options" can include 'exportDict'; what do we do for this?
     # TODO consistency with old HDF5; fix this when HDF5 api is in place
     # TODO expensive deepcopy prevents modification when sent to multiple outputs
+    result = finishedJob.getEvaluation()
     self._replaceVariablesNamesWithAliasSystem(result)
     if isinstance(result,Runners.Error):
       self.raiseAnError(Runners.Error,'No available output to collect!')

diff --git a/framework/Models/ExternalModel.py b/framework/Models/ExternalModel.py
@@ -250,11 +250,13 @@ def evaluateSample(self, myInput, samplerType, kwargs):
     """
     Input = self.createNewInput(myInput, samplerType, **kwargs)
     inRun = copy.copy(self._manipulateInput(Input[0][0]))
+    # collect results from model run
     result,instSelf = self._externalRun(inRun,Input[1],) #entry [1] is the external model object; it doesn't appear to be needed
-    rlz = {}
-    rlz.update(inRun)
-    rlz.update(result)
-    rlz.update(kwargs)
+    # build realization
+    # assure rlz has all metadata
+    rlz = dict((var,np.atleast_1d(kwargs[var])) for var in kwargs.keys())
+    # update rlz with input space from inRun and output space from result
+    rlz.update(dict((var,np.atleast_1d(inRun[var] if var in kwargs['SampledVars'] else result[var])) for var in set(result.keys()+inRun.keys())))
     return rlz
 
   def collectOutput(self,finishedJob,output,options=None):

diff --git a/framework/Models/ROM.py b/framework/Models/ROM.py
@@ -23,6 +23,7 @@
 #External Modules------------------------------------------------------------------------------------
 import copy
 import inspect
+import numpy as np
 #External Modules End--------------------------------------------------------------------------------
 
 #Internal Modules------------------------------------------------------------------------------------
@@ -362,6 +363,9 @@ def evaluate(self,request):
     """
     inputToROM       = self._inputToInternal(request)
     outputEvaluation = self.supervisedEngine.evaluate(inputToROM)
+    # assure numpy array formatting # TODO can this be done in the supervised engine instead?
+    for k,v in outputEvaluation.items():
+      outputEvaluation[k] = np.atleast_1d(v)
     return outputEvaluation
 
   def _externalRun(self,inRun):
@@ -390,9 +394,13 @@ def evaluateSample(self, myInput, samplerType, kwargs):
     """
     Input = self.createNewInput(myInput, samplerType, **kwargs)
     inRun = self._manipulateInput(Input[0])
-    rlz = self._externalRun(inRun)
-    rlz.update(inRun)
-    rlz.update(kwargs)
+    # collect results from model run
+    result = self._externalRun(inRun)
+    # build realization
+    # assure rlz has all metadata
+    rlz = dict((var,np.atleast_1d(kwargs[var]) for var in kwargs.keys())
+    # update rlz with input space from inRun and output space from result
+    rlz.update(dict((var,np.atlest_1d(inRun[var] if var in kwargs['SampledVars'] else result[var])) for var in set(result.keys()+inRun.keys())))
     return rlz
 
   def reseed(self,seed):

diff --git a/framework/Optimizers/Optimizer.py b/framework/Optimizers/Optimizer.py
@@ -480,8 +480,8 @@ def initialize(self,externalSeeding=None,solutionExport=None):
 
     self.mdlEvalHist = self.assemblerDict['TargetEvaluation'][0][3]
     # check if the TargetEvaluation feature and target spaces are consistent
-    ins  = self.mdlEvalHist.getParaKeys("inputs")
-    outs = self.mdlEvalHist.getParaKeys("outputs")
+    ins  = self.mdlEvalHist.getVarss("inputs")
+    outs = self.mdlEvalHist.getVars("outputs")
     for varName in self.fullOptVars:
       if varName not in ins:
         self.raiseAnError(RuntimeError,"the optimization variable "+varName+" is not contained in the TargetEvaluation object "+self.mdlEvalHist.name)
@@ -810,7 +810,7 @@ def getLossFunctionGivenId(self, evaluationID):
       @ In, evaluationID, string, the evaluation identifier (prefix)
       @ Out, functionValue, float, the loss function value
     """
-    objective  = self.mdlEvalHist.getParametersValues('outputs', nodeId = 'RecontructEnding')[self.objVar]
+    objective  = self.mdlEvalHist.getVars('output', nodeId = 'RecontructEnding')[self.objVar]
     prefix = self.mdlEvalHist.getMetadata('prefix',nodeId='RecontructEnding')
     if len(prefix) > 0 and utils.returnIdSeparator() in prefix[0]:
       # ensemble model id modification

diff --git a/framework/Samplers/AdaptiveSobol.py b/framework/Samplers/AdaptiveSobol.py
@@ -411,12 +411,10 @@ def _addPointToDataObject(self,subset,point):
     """
     pointSet = self.samplers[subset].solns
     #first, check if the output is in the subset's existing solution set already
-    inExisting = self.solns.getMatchingRealization(self._tupleToDict(self._expandCutPoint(subset,point)))
+    _,inExisting = self.solns.realization(matchDict=self._tupleToDict(self._expandCutPoint(subset,point)))
     #add the point to the data set.
-    for var in pointSet.getParaKeys('inputs'):
-      pointSet.updateInputValue(var,inExisting['inputs'][var])
-    for var in pointSet.getParaKeys('outputs'):
-      pointSet.updateOutputValue(var,inExisting['outputs'][var])
+    rlz = dict((var,np.atleast_1d(inExisting[var])) for var in pointSet.getVars())
+    pointSet.addRealization(rlz)
 
   def _calcActualImpact(self,subset,target):
     """
@@ -811,8 +809,8 @@ def _makeSubsetRom(self,subset):
     for inp in self.sorted:
       if self._checkCutPoint(subset,inp):
         #get the solution
-        inExisting = self.solns.getMatchingRealization(self._tupleToDict(inp))
-        soln = self._dictToTuple(inExisting['outputs'],output=True)
+        _,inExisting = self.solns.realization(matchDict=self._tupleToDict(inp))
+        soln = self._dictToTuple(inExisting,output=True)
         #get the cut point
         cinp = self._extractCutPoint(subset,inp)
         self._addPointToDataObject(subset,cinp)
@@ -886,7 +884,7 @@ def _retrieveNeededPoints(self,subset):
       cutpt = sampler.neededPoints.pop()
       fullPoint = self._expandCutPoint(subset,cutpt)
       #if this point already in local existing, put it straight into collected and sampler existing
-      inExisting = self.solns.getMatchingRealization(self._tupleToDict(fullPoint))
+      _,inExisting = self.solns.realization(matchDict=self._tupleToDict(fullPoint))
       if inExisting is not None:
         self.pointsCollected[subset].append(cutpt)
         self._addPointToDataObject(subset,cutpt)
@@ -901,13 +899,13 @@ def _sortNewPoints(self):
       @ Out, None
     """
     #if there's no solutions in the set, no work to do
-    if self.solns.isItEmpty():
+    if len(self.solns) == 0:
       return
     #update self.exisitng for adaptive sobol sampler (this class)
     for i in range(len(self.solns)):
-      existing = self.solns.getRealization(i)
-      inp = self._dictToTuple(existing['inputs'])
-      soln = self._dictToTuple(existing['outputs'],output=True)
+      existing = self.solns.realization(index=i)
+      inp = self._dictToTuple(existing)
+      soln = self._dictToTuple(existing,output=True)
       #if point already sorted, don't re-do work
       if inp not in self.submittedNotCollected:
         continue

diff --git a/framework/Samplers/AdaptiveSparseGrid.py b/framework/Samplers/AdaptiveSparseGrid.py
@@ -388,7 +388,7 @@ def _addNewPoints(self,SG=None):
       ]:
       self.pointsNeededToMakeROM.add(pt) #sets won't store redundancies
       #if pt isn't already in needed, and it hasn't already been solved, add it to the queue
-      if pt not in self.neededPoints and self.solns.getMatchingRealization(self._tupleToDict(pt)) is None:
+      if pt not in self.neededPoints and self.solns.realization(matchDict=self._tupleToDict(pt))[1] is None:
         self.newSolutionSizeShouldBe+=1
         self.neededPoints.append(pt)
 
@@ -499,7 +499,7 @@ def _integrateFunction(self,sg,r,i):
     tot=0
     for n in range(len(sg)):
       pt,wt = sg[n]
-      inExisting = self.solns.getMatchingRealization(self._tupleToDict(pt))
+      _,inExisting = self.solns.realization(matchDict=self._tupleToDict(pt))
       if inExisting is None:
         self.raiseAnError(RuntimeError,'Trying to integrate with point',pt,'but it is not in the solutions!')
       tot+=inExisting['outputs'][self.targets[i]]**r*wt

diff --git a/framework/Samplers/Sampler.py b/framework/Samplers/Sampler.py
@@ -678,8 +678,9 @@ def generateInput(self,model,oldInput):
     else:
       inExisting = None
     # reformat metadata into acceptable format for dataojbect
-    self.inputInfo['ProbabilityWeight'] = np.atleast_1d(self.inputInfo['ProbabilityWeight'])
-    self.inputInfo['prefix'] = np.atleast_1d(self.inputInfo['prefix'])
+    # DO NOT format here, let that happen when a realization is made in collectOutput for each Model.  Sampler doesn't care about this.
+    # self.inputInfo['ProbabilityWeight'] = np.atleast_1d(self.inputInfo['ProbabilityWeight'])
+    # self.inputInfo['prefix'] = np.atleast_1d(self.inputInfo['prefix'])
     #if not found or not restarting, we have a new point!
     if inExisting is None:
       self.raiseADebug('Found new point to sample:',self.values)
@@ -698,6 +699,7 @@ def generateInput(self,model,oldInput):
       self.raiseADebug('Point found in restart!')
       rlz = {}
       # we've fixed it so teh input and output space don't really matter, so use restartData's own definition
+      # DO format the data as atleast_1d so it's consistent in the ExternalModel for users (right?)
       rlz['inputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('input'))
       rlz['outputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('output'))
       rlz['metadata'] = {'prefix':self.inputInfo['prefix'],

diff --git a/framework/Samplers/Sobol.py b/framework/Samplers/Sobol.py
@@ -223,4 +223,5 @@ def localGenerateInput(self,model,myInput):
         self.inputInfo['ProbabilityWeight-'+varName.replace(",","!")] = self.inputInfo['SampledVarsPb'][varName]
 
     self.inputInfo['PointProbability'] = reduce(mul,self.inputInfo['SampledVarsPb'].values())
+    self.inputInfo['ProbabilityWeight'] = np.atleast_1d(1.0) # weight has no meaning for sobol
     self.inputInfo['SamplerType'] = 'Sparse Grids for Sobol'
diff --git a/scripts/TestHarness/testers/UnorderedCSVDiffer.py b/scripts/TestHarness/testers/UnorderedCSVDiffer.py
@@ -36,6 +36,18 @@ def __init__(self, test_dir, out_files,relative_error=1e-10,absolute_check=False
     self.__check_absolute_values = absolute_check
     self.__rel_err = relative_error
 
+  def finalizeMessage(self,same,msg,filename):
+    """
+      Compiles useful messages to print, prepending with file paths.
+      @ In, same, bool, True if files are the same
+      @ In, msg, list(str), messages that explain differences
+      @ In, filename, str, test filename/path
+      @ Out, None
+    """
+    if not same:
+      self.__same = False
+      self.__message += '\nDIFF in {}: \n  {}'.format(filename,'\n  '.join(msg))
+
   def findRow(self,row,csv):
     """
       Searches for "row" in "csv"
@@ -62,6 +74,9 @@ def diff(self):
     """
     # read in files
     for outFile in self.__out_files:
+      # local "same" and message list
+      same = True
+      msg = []
       # load test file
       testFilename = os.path.join(self.__test_dir,outFile)
       try:
@@ -71,9 +86,8 @@ def diff(self):
         testCSV = None
       # if file doesn't exist, that's another problem
       except IOError:
-        self.__same = False
-        self.__message += '\nTest file does not exist: '+testFilename
-        continue
+        msg.append('Test file does not exist!')
+        same = False
       # load gold file
       goldFilename = os.path.join(self.__test_dir, 'gold', outFile)
       try:
@@ -83,31 +97,37 @@ def diff(self):
         goldCSV = None
       # if file doesn't exist, that's another problem
       except IOError:
-        self.__same = False
-        self.__message += '\nGold file does not exist: '+goldFilename
+        msg.append('Gold file does not exist!')
+        same = False
+      # if either file did not exist, clean up and go to next outfile
+      if not same:
+        self.finalizeMessage(same,msg,testFilename)
         continue
       # at this point, we've loaded both files (even if they're empty), so compare them.
       ## first, cover the case when both files are empty.
       if testCSV is None or goldCSV is None:
         if not (testCSV is None and goldCSV is None):
-          self.__same = False
+          same = False
           if testCSV is None:
-            self.__message += '\nTest file is empty, but Gold is not!'
+            msg.append('Test file is empty, but Gold is not!')
           else:
-            self.__message += '\nGold file is empty, but Test is not!'
+            msg.append('Gold file is empty, but Test is not!')
         # either way, move on to the next file, as no more comparison is needed
+        self.finalizeMessage(same,msg,testFilename)
         continue
       ## at this point, both files have data loaded
       ## check columns using symmetric difference
       diffColumns = set(goldCSV.columns)^set(testCSV.columns)
       if len(diffColumns) > 0:
-        self.__same = False
-        self.__message += '\nColumns are not the same! Different: {}'.format(diffColumns)
+        same = False
+        msg.append('Columns are not the same! Different: {}'.format(', '.join(diffColumns)))
+        self.finalizeMessage(same,msg,testFilename)
         continue
       ## check index length
       if len(goldCSV.index) != len(testCSV.index):
-        self.__same = False
-        self.__message += 'Different number of entires in Gold ({}) versus Test ({})!'.format(len(goldCSV.index),len(testCSV.index))
+        same = False
+        msg.append('Different number of entires in Gold ({}) versus Test ({})!'.format(len(goldCSV.index),len(testCSV.index)))
+        self.finalizeMessage(same,msg,testFilename)
         continue
       ## at this point both CSVs have the same shape, with the same header contents.
       ## align columns
@@ -117,8 +137,9 @@ def diff(self):
         find = goldCSV.iloc[idx].rename(None)
         match = self.findRow(find,testCSV)
         if not len(match) > 0:
-          self.__same = False
-          self.__message += '\nCould not find match for row "{}" in Gold:\n{}'.format(idx+2,find) #+2 because of header row
+          same = False
+          msg.append('Could not find match for row "{}" in Gold:\n{}'.format(idx+1,find)) #+1 because of header row
           # stop looking once a mismatch is found
           break
+      self.finalizeMessage(same,msg,testFilename)
     return self.__same, self.__message
diff --git a/tests/framework/ROM/pickleTests/gold/StochasticPolyPickleTest/Udump.csv b/tests/framework/ROM/pickleTests/gold/StochasticPolyPickleTest/Udump.csv
@@ -1,37 +1,37 @@
-x2,x1,ans
-1.0,1.0,2.0
-1.8,1.0,2.8
-2.6,1.0,3.6
-3.4,1.0,4.4
-4.2,1.0,5.2
-5.0,1.0,6.0
-1.0,1.8,2.8
-1.8,1.8,3.6
-2.6,1.8,4.4
-3.4,1.8,5.2
-4.2,1.8,6.0
-5.0,1.8,6.8
-1.0,2.6,3.6
-1.8,2.6,4.4
-2.6,2.6,5.2
-3.4,2.6,6.0
-4.2,2.6,6.8
-5.0,2.6,7.6
-1.0,3.4,4.4
-1.8,3.4,5.2
-2.6,3.4,6.0
-3.4,3.4,6.8
-4.2,3.4,7.6
-5.0,3.4,8.4
-1.0,4.2,5.2
-1.8,4.2,6.0
-2.6,4.2,6.8
-3.4,4.2,7.6
-4.2,4.2,8.4
-5.0,4.2,9.2
-1.0,5.0,6.0
-1.8,5.0,6.8
-2.6,5.0,7.6
-3.4,5.0,8.4
-4.2,5.0,9.2
-5.0,5.0,10.0
+x1,x2,ans
+1.0,1.0,1.999999999999986
+1.0,1.8,2.7999999999999923
+1.0,2.6000000000000001,3.5999999999999948
+1.0,3.4000000000000004,4.3999999999999915
+1.0,4.2000000000000002,5.199999999999986
+1.0,5.0,5.9999999999999778
+1.8,1.0,2.7999999999999927
+1.8,1.8,3.6000000000000001
+1.8,2.6000000000000001,4.400000000000003
+1.8,3.4000000000000004,5.2000000000000028
+1.8,4.2000000000000002,5.9999999999999973
+1.8,5.0,6.7999999999999874
+2.6000000000000001,1.0,3.5999999999999934
+2.6000000000000001,1.8,4.400000000000003
+2.6000000000000001,2.6000000000000001,5.2000000000000073
+2.6000000000000001,3.4000000000000004,6.0000000000000062
+2.6000000000000001,4.2000000000000002,6.8000000000000016
+2.6000000000000001,5.0,7.5999999999999917
+3.4000000000000004,1.0,4.3999999999999915
+3.4000000000000004,1.8,5.200000000000002
+3.4000000000000004,2.6000000000000001,6.0000000000000062
+3.4000000000000004,3.4000000000000004,6.800000000000006
+3.4000000000000004,4.2000000000000002,7.6000000000000005
+3.4000000000000004,5.0,8.3999999999999897
+4.2000000000000002,1.0,5.199999999999986
+4.2000000000000002,1.8,5.9999999999999973
+4.2000000000000002,2.6000000000000001,6.8000000000000016
+4.2000000000000002,3.4000000000000004,7.6000000000000005
+4.2000000000000002,4.2000000000000002,8.399999999999995
+4.2000000000000002,5.0,9.1999999999999815
+5.0,1.0,5.9999999999999778
+5.0,1.8,6.7999999999999874
+5.0,2.6000000000000001,7.5999999999999908
+5.0,3.4000000000000004,8.3999999999999897
+5.0,4.2000000000000002,9.1999999999999815
+5.0,5.0,9.9999999999999662