Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Talbpaul/register metadata #443

Merged
merged 8 commits into from
Nov 28, 2017
17 changes: 17 additions & 0 deletions framework/BaseClasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(self):
self.printTag = 'BaseType' # the tag that refers to this class in all the specific printing
self.messageHandler = None # message handling object
self.variableGroups = {} # the variables this class needs to be aware of
self.metadataKeys = set() # list of registered metadata keys to expect from this entity
self.mods = utils.returnImportModuleString(inspect.getmodule(BaseType)) #list of modules this class depends on (needed for automatic parallel python)
for baseClass in self.__class__.__mro__:
self.mods.extend(utils.returnImportModuleString(inspect.getmodule(baseClass),True))
Expand Down Expand Up @@ -240,3 +241,19 @@ def printMe(self):
self.raiseADebug(' Current Setting:')
for key in tempDict.keys():
self.raiseADebug(' {0:15}: {1}'.format(key,str(tempDict[key])))

def provideExpectedMetaKeys(self):
"""
Provides the registered list of metadata keys for this entity.
@ In, None
@ Out, meta, list(str), expected keys (empty if none)
"""
return self.metadataKeys

def addMetaKeys(self,*args):
"""
Adds keywords to a list of expected metadata keys.
@ In, args, list(str), keywords to register
@ Out, None
"""
self.metadataKeys = self.metadataKeys.union(set(args))
91 changes: 55 additions & 36 deletions framework/DataObjects/XDataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,31 +59,23 @@ class DataSet(DataObject):
"""
### EXTERNAL API ###
# These are the methods that RAVEN entities should call to interact with the data object
def addVariable(self,varName,values,classify='meta'):
def addExpectedMeta(self,keys):
"""
Adds a variable/column to the data. "values" needs to be as long as self.size.
@ In, varName, str, name of new variable
@ In, values, np.array, new values (floats/str for scalars, xr.DataArray for hists)
@ In, classify, str, optional, either 'input', 'output', or 'meta'
Registers meta to look for in realizations.
@ In, keys, set(str), keys to register
@ Out, None
"""
assert(isinstance(values,np.ndarray))
assert(len(values) == self.size)
assert(classify in ['input','output','meta'])
# first, collapse existing entries
self.asDataset()
# format as single data array
# TODO worry about sampleTag values?
column = self._collapseNDtoDataArray(values,varName,labels=self._data[self.sampleTag])
# add to the dataset
self._data = self._data.assign(**{varName:column})
if classify == 'input':
self._inputs.append(varName)
elif classify == 'output':
self._outputs.append(varName)
else:
self._metavars.append(varName)
self._allvars.append(varName)
# TODO add option to skip parts of meta if user wants to
# remove already existing keys
keys = list(key for key in keys if key not in self._allvars)
# if no new meta, move along
if len(keys) == 0:
return
# CANNOT add expected meta after samples are started
assert(self._data is None)
assert(self._collector is None or len(self._collector) == 0)
self._metavars.extend(keys)
self._allvars.extend(keys)

def addMeta(self,tag,xmlDict):
"""
Expand Down Expand Up @@ -141,29 +133,56 @@ def addRealization(self,rlz):
"val" is either a float or a np.ndarray of values.
@ Out, None
"""
# protect against back-changing realization
rlz = copy.deepcopy(rlz)
# clean out entries that aren't desired
try:
rlz = dict((var,rlz[var]) for var in self._allvars+self.indexes)
except KeyError as e:
self.raiseAnError(KeyError,'Provided realization does not have all requisite values: "{}"'.format(e.args[0]))
# check consistency, but make it an assertion so it can be passed over
assert self._checkRealizationFormat(rlz),'Realization was not formatted correctly! See warnings above.'
# first, update realization with selectors
if not self._checkRealizationFormat(rlz):
self.raiseAnError(SyntaxError,'Realization was not formatted correctly! See warnings above.')
# format the data
rlz = self._formatRealization(rlz)
# perform selective collapsing/picking of data
rlz = self._selectiveRealization(rlz)
# if collector/data not yet started, expand entries that aren't I/O as metadata
if self._data is None and self._collector is None:
unrecognized = set(rlz.keys()).difference(set(self._allvars))
if len(unrecognized) > 0:
self._metavars = list(unrecognized)
self._allvars += self._metavars
# check and order data to be stored
try:
newData = np.asarray([list(rlz[var] for var in self._allvars)],dtype=object)
except KeyError as e:
self.raiseAnError(KeyError,'Provided realization does not have all requisite values: "{}"'.format(e.args[0]))
newData = np.asarray([list(rlz[var] for var in self._allvars)],dtype=object)
# if data storage isn't set up, set it up
if self._collector is None:
self._collector = self._newCollector(width=len(rlz))
# append
self._collector.append(newData)
# reset scaling factors, kd tree
self._resetScaling()

def addVariable(self,varName,values,classify='meta'):
"""
Adds a variable/column to the data. "values" needs to be as long as self.size.
@ In, varName, str, name of new variable
@ In, values, np.array, new values (floats/str for scalars, xr.DataArray for hists)
@ In, classify, str, optional, either 'input', 'output', or 'meta'
@ Out, None
"""
assert(isinstance(values,np.ndarray))
assert(len(values) == self.size)
assert(classify in ['input','output','meta'])
# first, collapse existing entries
self.asDataset()
# format as single data array
# TODO worry about sampleTag values?
column = self._collapseNDtoDataArray(values,varName,labels=self._data[self.sampleTag])
# add to the dataset
self._data = self._data.assign(**{varName:column})
if classify == 'input':
self._inputs.append(varName)
elif classify == 'output':
self._outputs.append(varName)
else:
self._metavars.append(varName)
self._allvars.append(varName)

def asDataset(self):
"""
Casts this dataobject as an xr.Dataset.
Expand Down Expand Up @@ -290,7 +309,7 @@ def getMeta(self,keys=None,pointwise=False,general=False):
for var in self._metavars:
if var in pKeys:
# TODO if still collecting, an option to NOT call asDataset
meta[var] = self.asDataset()[var]#[self._allvars.index(var),:]
meta[var] = self.asDataset()[var]
if general:
meta.update(dict((key,self._meta[key]) for key in gKeys))
return meta
Expand Down Expand Up @@ -1044,7 +1063,7 @@ def _setScalingFactors(self):
for var in self._allvars:
## commented code. We use a try now for speed. It probably needs to be modified for ND arrays
# if not a float or int, don't scale it
# TODO this check is pretty convoluted; there's probably a better way to figure out the type of the variable
# TODO this check is pretty convoluted; there's probably a better way to figure out the type of the variable
#first = self._data.groupby(var).first()[var].item(0)
#if (not isinstance(first,(float,int))) or np.isnan(first):# or self._data[var].isnull().all():
# continue
Expand Down
1 change: 1 addition & 0 deletions framework/Models/Dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def evaluateSample(self, myInput, samplerType, kwargs):
inRun = self._manipulateInput(Input[0])
rlz = {}
rlz.update(inRun)
rlz.update(kwargs)
rlz['OutputPlaceHolder'] = np.atleast_1d(np.float(Input[1]['prefix']))
return rlz

Expand Down
3 changes: 2 additions & 1 deletion framework/Models/ExternalModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,10 +250,11 @@ def evaluateSample(self, myInput, samplerType, kwargs):
"""
Input = self.createNewInput(myInput, samplerType, **kwargs)
inRun = copy.copy(self._manipulateInput(Input[0][0]))
result,instSelf = self._externalRun(inRun,Input[1],) # TODO entry [1] is the external model object; do I need it?
result,instSelf = self._externalRun(inRun,Input[1],) #entry [1] is the external model object; it doesn't appear to be needed
rlz = {}
rlz.update(inRun)
rlz.update(result)
rlz.update(kwargs)
return rlz

def collectOutput(self,finishedJob,output,options=None):
Expand Down
9 changes: 9 additions & 0 deletions framework/Samplers/Sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import copy
import abc
import json
import numpy as np
#External Modules End--------------------------------------------------------------------------------

#Internal Modules------------------------------------------------------------------------------------
Expand Down Expand Up @@ -590,6 +591,11 @@ def initialize(self,externalSeeding=None,solutionExport=None):
self.inputInfo['transformation-'+distName] = transformDict
self.entitiesToRemove.append('transformation-'+distName)

# Register expected metadata
meta = ['ProbabilityWeight']
# TODO more meta needs to be added, this is just for testing so far.
self.addMetaKeys(*meta)

def localInitialize(self):
"""
use this function to add initialization features to the derived class
Expand Down Expand Up @@ -696,6 +702,9 @@ def generateInput(self,model,oldInput):
## a copy of the information, otherwise we have to be careful to create a
## deep copy of this information when we submit it to a job).
## -- DPM 4/18/17
# reformat metadata into acceptable format for dataojbect
# TODO do it by meta key, as we realize we need them
self.inputInfo['ProbabilityWeight'] = np.atleast_1d(self.inputInfo['ProbabilityWeight'])
return 0,oldInput
#otherwise, return the restart point
else:
Expand Down
26 changes: 25 additions & 1 deletion framework/Steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import abc
import os
import sys
import itertools
if sys.version_info.major > 2:
import pickle
else:
Expand All @@ -45,7 +46,7 @@
from utils import InputData
import Models
from OutStreams import OutStreamManager
from DataObjects import Data
from DataObjects import Data,DataSet
#Internal Modules End--------------------------------------------------------------------------------


Expand Down Expand Up @@ -401,6 +402,28 @@ def _localInitializeStep(self,inDictionary):
inDictionary['Output'][i].initialize(inDictionary)

self.raiseADebug('for the role Output the item of class {0:15} and name {1:15} has been initialized'.format(inDictionary['Output'][i].type,inDictionary['Output'][i].name))
self._registerMetadata(inDictionary)

def _registerMetadata(self,inDictionary):
"""
collects expected metadata keys and deliver them to output data objects
@ In, inDictionary, dict, initialization dictionary
@ Out, None
"""
## first collect them
metaKeys = set()
for role,entities in inDictionary.items():
if isinstance(entities,list):
for entity in entities:
if hasattr(entity,'provideExpectedMetaKeys'):
metaKeys = metaKeys.union(entity.provideExpectedMetaKeys())
else:
if hasattr(entities,'provideExpectedMetaKeys'):
metaKeys = metaKeys.union(entities.provideExpectedMetaKeys())
## then give them to the output data objects
for out in inDictionary['Output']:
if isinstance(out,DataSet):
out.addExpectedMeta(metaKeys)

def _localTakeAstepRun(self,inDictionary):
"""
Expand Down Expand Up @@ -569,6 +592,7 @@ def _localInitializeStep(self,inDictionary):
self.raiseADebug('Submitted input '+str(inputIndex+1))
except utils.NoMoreSamplesNeeded:
self.raiseAMessage('Sampler returned "NoMoreSamplesNeeded". Continuing...')
self._registerMetadata(inDictionary)

def _localTakeAstepRun(self,inDictionary):
"""
Expand Down