Skip to content

Commit

Permalink
Adaptive Samplers (plus Dummy IO fix) (#459)
Browse files Browse the repository at this point in the history
* adaptive sparse grid, sobol working

* Adaptive sampling, plus Dummy-based rlz updates from input, output

* cleanup

* fixed prefix as numpy array
  • Loading branch information
PaulTalbot-INL authored and alfoa committed Dec 10, 2017
1 parent ddf3fc7 commit 9b17e91
Show file tree
Hide file tree
Showing 34 changed files with 224 additions and 473 deletions.
2 changes: 1 addition & 1 deletion framework/Models/Code.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ def collectOutputFromDict(self,exportDict,output,options=None):
@ In, options, dict, optional, dictionary of options that can be passed in when the collect of the output is performed by another model (e.g. EnsembleModel)
@ Out, None
"""
prefix = exportDict.pop('prefix')
prefix = exportDict.pop('prefix',None)
if 'inputSpaceParams' in exportDict.keys():
inKey = 'inputSpaceParams'
outKey = 'outputSpaceParams'
Expand Down
9 changes: 5 additions & 4 deletions framework/Models/Dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,10 @@ def evaluateSample(self, myInput, samplerType, kwargs):
"""
Input = self.createNewInput(myInput, samplerType, **kwargs)
inRun = self._manipulateInput(Input[0])
rlz = {}
rlz.update(inRun)
rlz.update(kwargs)
rlz['OutputPlaceHolder'] = np.atleast_1d(np.float(Input[1]['prefix']))
# build realization using input space from inRun and metadata from kwargs
rlz = dict((var,np.atlesat_1d(inRun[var] if var in kwargs['SampledVars'] else kwargs[var])) for var in set(kwargs.keys()+inRun.keys()))
# add dummy output space
rlz['OutputPlaceHolder'] = np.atleast_1d(float(Input[1]['prefix'][0]))
return rlz

def collectOutput(self,finishedJob,output,options=None):
Expand All @@ -185,6 +185,7 @@ def collectOutput(self,finishedJob,output,options=None):
# TODO apparently sometimes "options" can include 'exportDict'; what do we do for this?
# TODO consistency with old HDF5; fix this when HDF5 api is in place
# TODO expensive deepcopy prevents modification when sent to multiple outputs
result = finishedJob.getEvaluation()
self._replaceVariablesNamesWithAliasSystem(result)
if isinstance(result,Runners.Error):
self.raiseAnError(Runners.Error,'No available output to collect!')
Expand Down
10 changes: 6 additions & 4 deletions framework/Models/ExternalModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,11 +250,13 @@ def evaluateSample(self, myInput, samplerType, kwargs):
"""
Input = self.createNewInput(myInput, samplerType, **kwargs)
inRun = copy.copy(self._manipulateInput(Input[0][0]))
# collect results from model run
result,instSelf = self._externalRun(inRun,Input[1],) #entry [1] is the external model object; it doesn't appear to be needed
rlz = {}
rlz.update(inRun)
rlz.update(result)
rlz.update(kwargs)
# build realization
# assure rlz has all metadata
rlz = dict((var,np.atleast_1d(kwargs[var])) for var in kwargs.keys())
# update rlz with input space from inRun and output space from result
rlz.update(dict((var,np.atleast_1d(inRun[var] if var in kwargs['SampledVars'] else result[var])) for var in set(result.keys()+inRun.keys())))
return rlz

def collectOutput(self,finishedJob,output,options=None):
Expand Down
14 changes: 11 additions & 3 deletions framework/Models/ROM.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#External Modules------------------------------------------------------------------------------------
import copy
import inspect
import numpy as np
#External Modules End--------------------------------------------------------------------------------

#Internal Modules------------------------------------------------------------------------------------
Expand Down Expand Up @@ -362,6 +363,9 @@ def evaluate(self,request):
"""
inputToROM = self._inputToInternal(request)
outputEvaluation = self.supervisedEngine.evaluate(inputToROM)
# assure numpy array formatting # TODO can this be done in the supervised engine instead?
for k,v in outputEvaluation.items():
outputEvaluation[k] = np.atleast_1d(v)
return outputEvaluation

def _externalRun(self,inRun):
Expand Down Expand Up @@ -390,9 +394,13 @@ def evaluateSample(self, myInput, samplerType, kwargs):
"""
Input = self.createNewInput(myInput, samplerType, **kwargs)
inRun = self._manipulateInput(Input[0])
rlz = self._externalRun(inRun)
rlz.update(inRun)
rlz.update(kwargs)
# collect results from model run
result = self._externalRun(inRun)
# build realization
# assure rlz has all metadata
rlz = dict((var,np.atleast_1d(kwargs[var]) for var in kwargs.keys())
# update rlz with input space from inRun and output space from result
rlz.update(dict((var,np.atlest_1d(inRun[var] if var in kwargs['SampledVars'] else result[var])) for var in set(result.keys()+inRun.keys())))
return rlz

def reseed(self,seed):
Expand Down
6 changes: 3 additions & 3 deletions framework/Optimizers/Optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,8 +480,8 @@ def initialize(self,externalSeeding=None,solutionExport=None):

self.mdlEvalHist = self.assemblerDict['TargetEvaluation'][0][3]
# check if the TargetEvaluation feature and target spaces are consistent
ins = self.mdlEvalHist.getParaKeys("inputs")
outs = self.mdlEvalHist.getParaKeys("outputs")
ins = self.mdlEvalHist.getVarss("inputs")
outs = self.mdlEvalHist.getVars("outputs")
for varName in self.fullOptVars:
if varName not in ins:
self.raiseAnError(RuntimeError,"the optimization variable "+varName+" is not contained in the TargetEvaluation object "+self.mdlEvalHist.name)
Expand Down Expand Up @@ -810,7 +810,7 @@ def getLossFunctionGivenId(self, evaluationID):
@ In, evaluationID, string, the evaluation identifier (prefix)
@ Out, functionValue, float, the loss function value
"""
objective = self.mdlEvalHist.getParametersValues('outputs', nodeId = 'RecontructEnding')[self.objVar]
objective = self.mdlEvalHist.getVars('output', nodeId = 'RecontructEnding')[self.objVar]
prefix = self.mdlEvalHist.getMetadata('prefix',nodeId='RecontructEnding')
if len(prefix) > 0 and utils.returnIdSeparator() in prefix[0]:
# ensemble model id modification
Expand Down
22 changes: 10 additions & 12 deletions framework/Samplers/AdaptiveSobol.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,12 +411,10 @@ def _addPointToDataObject(self,subset,point):
"""
pointSet = self.samplers[subset].solns
#first, check if the output is in the subset's existing solution set already
inExisting = self.solns.getMatchingRealization(self._tupleToDict(self._expandCutPoint(subset,point)))
_,inExisting = self.solns.realization(matchDict=self._tupleToDict(self._expandCutPoint(subset,point)))
#add the point to the data set.
for var in pointSet.getParaKeys('inputs'):
pointSet.updateInputValue(var,inExisting['inputs'][var])
for var in pointSet.getParaKeys('outputs'):
pointSet.updateOutputValue(var,inExisting['outputs'][var])
rlz = dict((var,np.atleast_1d(inExisting[var])) for var in pointSet.getVars())
pointSet.addRealization(rlz)

def _calcActualImpact(self,subset,target):
"""
Expand Down Expand Up @@ -811,8 +809,8 @@ def _makeSubsetRom(self,subset):
for inp in self.sorted:
if self._checkCutPoint(subset,inp):
#get the solution
inExisting = self.solns.getMatchingRealization(self._tupleToDict(inp))
soln = self._dictToTuple(inExisting['outputs'],output=True)
_,inExisting = self.solns.realization(matchDict=self._tupleToDict(inp))
soln = self._dictToTuple(inExisting,output=True)
#get the cut point
cinp = self._extractCutPoint(subset,inp)
self._addPointToDataObject(subset,cinp)
Expand Down Expand Up @@ -886,7 +884,7 @@ def _retrieveNeededPoints(self,subset):
cutpt = sampler.neededPoints.pop()
fullPoint = self._expandCutPoint(subset,cutpt)
#if this point already in local existing, put it straight into collected and sampler existing
inExisting = self.solns.getMatchingRealization(self._tupleToDict(fullPoint))
_,inExisting = self.solns.realization(matchDict=self._tupleToDict(fullPoint))
if inExisting is not None:
self.pointsCollected[subset].append(cutpt)
self._addPointToDataObject(subset,cutpt)
Expand All @@ -901,13 +899,13 @@ def _sortNewPoints(self):
@ Out, None
"""
#if there's no solutions in the set, no work to do
if self.solns.isItEmpty():
if len(self.solns) == 0:
return
#update self.exisitng for adaptive sobol sampler (this class)
for i in range(len(self.solns)):
existing = self.solns.getRealization(i)
inp = self._dictToTuple(existing['inputs'])
soln = self._dictToTuple(existing['outputs'],output=True)
existing = self.solns.realization(index=i)
inp = self._dictToTuple(existing)
soln = self._dictToTuple(existing,output=True)
#if point already sorted, don't re-do work
if inp not in self.submittedNotCollected:
continue
Expand Down
4 changes: 2 additions & 2 deletions framework/Samplers/AdaptiveSparseGrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ def _addNewPoints(self,SG=None):
]:
self.pointsNeededToMakeROM.add(pt) #sets won't store redundancies
#if pt isn't already in needed, and it hasn't already been solved, add it to the queue
if pt not in self.neededPoints and self.solns.getMatchingRealization(self._tupleToDict(pt)) is None:
if pt not in self.neededPoints and self.solns.realization(matchDict=self._tupleToDict(pt))[1] is None:
self.newSolutionSizeShouldBe+=1
self.neededPoints.append(pt)

Expand Down Expand Up @@ -499,7 +499,7 @@ def _integrateFunction(self,sg,r,i):
tot=0
for n in range(len(sg)):
pt,wt = sg[n]
inExisting = self.solns.getMatchingRealization(self._tupleToDict(pt))
_,inExisting = self.solns.realization(matchDict=self._tupleToDict(pt))
if inExisting is None:
self.raiseAnError(RuntimeError,'Trying to integrate with point',pt,'but it is not in the solutions!')
tot+=inExisting['outputs'][self.targets[i]]**r*wt
Expand Down
6 changes: 4 additions & 2 deletions framework/Samplers/Sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,8 +678,9 @@ def generateInput(self,model,oldInput):
else:
inExisting = None
# reformat metadata into acceptable format for dataojbect
self.inputInfo['ProbabilityWeight'] = np.atleast_1d(self.inputInfo['ProbabilityWeight'])
self.inputInfo['prefix'] = np.atleast_1d(self.inputInfo['prefix'])
# DO NOT format here, let that happen when a realization is made in collectOutput for each Model. Sampler doesn't care about this.
# self.inputInfo['ProbabilityWeight'] = np.atleast_1d(self.inputInfo['ProbabilityWeight'])
# self.inputInfo['prefix'] = np.atleast_1d(self.inputInfo['prefix'])
#if not found or not restarting, we have a new point!
if inExisting is None:
self.raiseADebug('Found new point to sample:',self.values)
Expand All @@ -698,6 +699,7 @@ def generateInput(self,model,oldInput):
self.raiseADebug('Point found in restart!')
rlz = {}
# we've fixed it so teh input and output space don't really matter, so use restartData's own definition
# DO format the data as atleast_1d so it's consistent in the ExternalModel for users (right?)
rlz['inputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('input'))
rlz['outputs'] = dict((var,np.atleast_1d(inExisting[var])) for var in self.restartData.getVars('output'))
rlz['metadata'] = {'prefix':self.inputInfo['prefix'],
Expand Down
1 change: 1 addition & 0 deletions framework/Samplers/Sobol.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,4 +223,5 @@ def localGenerateInput(self,model,myInput):
self.inputInfo['ProbabilityWeight-'+varName.replace(",","!")] = self.inputInfo['SampledVarsPb'][varName]

self.inputInfo['PointProbability'] = reduce(mul,self.inputInfo['SampledVarsPb'].values())
self.inputInfo['ProbabilityWeight'] = np.atleast_1d(1.0) # weight has no meaning for sobol
self.inputInfo['SamplerType'] = 'Sparse Grids for Sobol'
49 changes: 35 additions & 14 deletions scripts/TestHarness/testers/UnorderedCSVDiffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ def __init__(self, test_dir, out_files,relative_error=1e-10,absolute_check=False
self.__check_absolute_values = absolute_check
self.__rel_err = relative_error

def finalizeMessage(self,same,msg,filename):
"""
Compiles useful messages to print, prepending with file paths.
@ In, same, bool, True if files are the same
@ In, msg, list(str), messages that explain differences
@ In, filename, str, test filename/path
@ Out, None
"""
if not same:
self.__same = False
self.__message += '\nDIFF in {}: \n {}'.format(filename,'\n '.join(msg))

def findRow(self,row,csv):
"""
Searches for "row" in "csv"
Expand All @@ -62,6 +74,9 @@ def diff(self):
"""
# read in files
for outFile in self.__out_files:
# local "same" and message list
same = True
msg = []
# load test file
testFilename = os.path.join(self.__test_dir,outFile)
try:
Expand All @@ -71,9 +86,8 @@ def diff(self):
testCSV = None
# if file doesn't exist, that's another problem
except IOError:
self.__same = False
self.__message += '\nTest file does not exist: '+testFilename
continue
msg.append('Test file does not exist!')
same = False
# load gold file
goldFilename = os.path.join(self.__test_dir, 'gold', outFile)
try:
Expand All @@ -83,31 +97,37 @@ def diff(self):
goldCSV = None
# if file doesn't exist, that's another problem
except IOError:
self.__same = False
self.__message += '\nGold file does not exist: '+goldFilename
msg.append('Gold file does not exist!')
same = False
# if either file did not exist, clean up and go to next outfile
if not same:
self.finalizeMessage(same,msg,testFilename)
continue
# at this point, we've loaded both files (even if they're empty), so compare them.
## first, cover the case when both files are empty.
if testCSV is None or goldCSV is None:
if not (testCSV is None and goldCSV is None):
self.__same = False
same = False
if testCSV is None:
self.__message += '\nTest file is empty, but Gold is not!'
msg.append('Test file is empty, but Gold is not!')
else:
self.__message += '\nGold file is empty, but Test is not!'
msg.append('Gold file is empty, but Test is not!')
# either way, move on to the next file, as no more comparison is needed
self.finalizeMessage(same,msg,testFilename)
continue
## at this point, both files have data loaded
## check columns using symmetric difference
diffColumns = set(goldCSV.columns)^set(testCSV.columns)
if len(diffColumns) > 0:
self.__same = False
self.__message += '\nColumns are not the same! Different: {}'.format(diffColumns)
same = False
msg.append('Columns are not the same! Different: {}'.format(', '.join(diffColumns)))
self.finalizeMessage(same,msg,testFilename)
continue
## check index length
if len(goldCSV.index) != len(testCSV.index):
self.__same = False
self.__message += 'Different number of entires in Gold ({}) versus Test ({})!'.format(len(goldCSV.index),len(testCSV.index))
same = False
msg.append('Different number of entires in Gold ({}) versus Test ({})!'.format(len(goldCSV.index),len(testCSV.index)))
self.finalizeMessage(same,msg,testFilename)
continue
## at this point both CSVs have the same shape, with the same header contents.
## align columns
Expand All @@ -117,8 +137,9 @@ def diff(self):
find = goldCSV.iloc[idx].rename(None)
match = self.findRow(find,testCSV)
if not len(match) > 0:
self.__same = False
self.__message += '\nCould not find match for row "{}" in Gold:\n{}'.format(idx+2,find) #+2 because of header row
same = False
msg.append('Could not find match for row "{}" in Gold:\n{}'.format(idx+1,find)) #+1 because of header row
# stop looking once a mismatch is found
break
self.finalizeMessage(same,msg,testFilename)
return self.__same, self.__message
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
x2,x1,ans
1.0,1.0,2.0
1.8,1.0,2.8
2.6,1.0,3.6
3.4,1.0,4.4
4.2,1.0,5.2
5.0,1.0,6.0
1.0,1.8,2.8
1.8,1.8,3.6
2.6,1.8,4.4
3.4,1.8,5.2
4.2,1.8,6.0
5.0,1.8,6.8
1.0,2.6,3.6
1.8,2.6,4.4
2.6,2.6,5.2
3.4,2.6,6.0
4.2,2.6,6.8
5.0,2.6,7.6
1.0,3.4,4.4
1.8,3.4,5.2
2.6,3.4,6.0
3.4,3.4,6.8
4.2,3.4,7.6
5.0,3.4,8.4
1.0,4.2,5.2
1.8,4.2,6.0
2.6,4.2,6.8
3.4,4.2,7.6
4.2,4.2,8.4
5.0,4.2,9.2
1.0,5.0,6.0
1.8,5.0,6.8
2.6,5.0,7.6
3.4,5.0,8.4
4.2,5.0,9.2
5.0,5.0,10.0
x1,x2,ans
1.0,1.0,1.999999999999986
1.0,1.8,2.7999999999999923
1.0,2.6000000000000001,3.5999999999999948
1.0,3.4000000000000004,4.3999999999999915
1.0,4.2000000000000002,5.199999999999986
1.0,5.0,5.9999999999999778
1.8,1.0,2.7999999999999927
1.8,1.8,3.6000000000000001
1.8,2.6000000000000001,4.400000000000003
1.8,3.4000000000000004,5.2000000000000028
1.8,4.2000000000000002,5.9999999999999973
1.8,5.0,6.7999999999999874
2.6000000000000001,1.0,3.5999999999999934
2.6000000000000001,1.8,4.400000000000003
2.6000000000000001,2.6000000000000001,5.2000000000000073
2.6000000000000001,3.4000000000000004,6.0000000000000062
2.6000000000000001,4.2000000000000002,6.8000000000000016
2.6000000000000001,5.0,7.5999999999999917
3.4000000000000004,1.0,4.3999999999999915
3.4000000000000004,1.8,5.200000000000002
3.4000000000000004,2.6000000000000001,6.0000000000000062
3.4000000000000004,3.4000000000000004,6.800000000000006
3.4000000000000004,4.2000000000000002,7.6000000000000005
3.4000000000000004,5.0,8.3999999999999897
4.2000000000000002,1.0,5.199999999999986
4.2000000000000002,1.8,5.9999999999999973
4.2000000000000002,2.6000000000000001,6.8000000000000016
4.2000000000000002,3.4000000000000004,7.6000000000000005
4.2000000000000002,4.2000000000000002,8.399999999999995
4.2000000000000002,5.0,9.1999999999999815
5.0,1.0,5.9999999999999778
5.0,1.8,6.7999999999999874
5.0,2.6000000000000001,7.5999999999999908
5.0,3.4000000000000004,8.3999999999999897
5.0,4.2000000000000002,9.1999999999999815
5.0,5.0,9.9999999999999662
Loading

0 comments on commit 9b17e91

Please sign in to comment.