Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Global TSA via ROMCollection #2189

Merged
merged 11 commits into from
Oct 5, 2023
5 changes: 3 additions & 2 deletions ravenframework/SupervisedLearning/SupervisedLearning.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,13 +753,14 @@ def adjustLocalRomSegment(self, settings, picker):
# by default, do nothing
pass

def finalizeLocalRomSegmentEvaluation(self, settings, evaluation, picker):
def finalizeLocalRomSegmentEvaluation(self, settings, evaluation, globalPicker, localPicker=None):
"""
Allows global settings in "settings" to affect a LOCAL evaluation of a LOCAL ROM
Note this is called on the LOCAL subsegment ROM and not the GLOBAL templateROM.
@ In, settings, dict, as from getGlobalRomSegmentSettings
@ In, evaluation, dict, preliminary evaluation from the local segment ROM as {target: [values]}
@ In, picker, slice, indexer for data range of this segment
@ In, globalPicker, slice, indexer for data range of this segment FROM GLOBAL SIGNAL
@ In, localPicker, slice, optional, indexer for part of signal that should be adjusted IN LOCAL SIGNAL
@ Out, evaluation, dict, {target: np.ndarray} adjusted global evaluation
"""
return evaluation
Expand Down
52 changes: 52 additions & 0 deletions ravenframework/SupervisedLearning/SyntheticHistory.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"""
import numpy as np
import collections
import copy

from ..utils import InputData, xmlUtils
from ..TSA import TSAUser
Expand Down Expand Up @@ -78,6 +79,8 @@ def _handleInput(self, paramInput):
"""
SupervisedLearning._handleInput(self, paramInput)
self.readTSAInput(paramInput)
if len(self._tsaAlgorithms)==0:
self.raiseAWarning("No Segmenting algorithms were requested.")

def _train(self, featureVals, targetVals):
"""
Expand All @@ -97,6 +100,55 @@ def __evaluateLocal__(self, featureVals):
rlz = self.evaluateTSASequential()
return rlz


def getGlobalRomSegmentSettings(self, trainingDict, divisions):
"""
Allows the ROM to perform some analysis before segmenting.
Note this is called on the GLOBAL templateROM from the ROMcollection, NOT on the LOCAL subsegment ROMs!
@ In, trainingDict, dict, data for training, full and unsegmented
@ In, divisions, tuple, (division slice indices, unclustered spaces)
@ Out, settings, object, arbitrary information about ROM clustering settings
@ Out, trainingDict, dict, adjusted training data (possibly unchanged)
"""
self.raiseADebug('Training Global...')
# extracting info from training Dict, convert all signals to single array
trainingDict = copy.deepcopy(trainingDict)
names, values = list(trainingDict.keys()), list(trainingDict.values())
## This is for handling the special case needed by skl *MultiTask* that
## requires multiple targets.
targetValues = []
targetNames = []
for target in self.target:
if target in names:
targetValues.append(values[names.index(target)])
targetNames.append(target)
else:
self.raiseAnError(IOError,'The target '+target+' is not in the training set')
# stack targets
targetValues = np.stack(targetValues, axis=-1)
self.trainTSASequential(targetValues, trainGlobal=True)
settings = self.getGlobalTSARomSettings()
# update targets in trainingDict
for i,target in enumerate(targetNames):
trainingDict[target] = targetValues[:,:,i]
return settings, trainingDict

def finalizeGlobalRomSegmentEvaluation(self, settings, evaluation, weights, slicer):
"""
Allows any global settings to be applied to the signal collected by the ROMCollection instance.
Note this is called on the GLOBAL templateROM from the ROMcollection, NOT on the LOCAL supspace segment ROMs!
@ In, settings, dict, as from getGlobalRomSegmentSettings
@ In, evaluation, dict, {target: np.ndarray} evaluated full (global) signal from ROMCollection
@ In, weights, np.array(float), optional, if included then gives weight to histories for CDF preservation
@ In, slicer, slice, indexer for data range of this segment FROM GLOBAL SIGNAL
@ Out, evaluation, dict, {target: np.ndarray} adjusted global evaluation
"""
if len(self._tsaGlobalAlgorithms)>0:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we raise a warning or error here if len(self._tsaGlobalAlgorithms) == 0?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this method is called in ROMCollection in evaluate( ) and it seems like it was called regardless of whether a global or local algorithm was specified. the way I have it now, it just reports back the same evaluation if no global algorithms were specified so there shouldn't be an error as is

rlz = self.evaluateTSASequential(evalGlobal=True, evaluation=evaluation, slicer=slicer)
for key,val in rlz.items():
evaluation[key] = val
return evaluation

def writePointwiseData(self, writeTo):
"""
Writes pointwise data about this ROM to the data object.
Expand Down
22 changes: 13 additions & 9 deletions ravenframework/TSA/ARMA.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,11 @@ def fit(self, signal, pivot, targets, settings):
'ma': res.polynomial_ma[1:], # MA
'var': res.params[res.param_names.index('sigma2')], # variance
'initials': initDist, # characteristics for sampling initial states
'model': model}
'lags': [P,d,Q],
'model': {'obs_cov': model['obs_cov'],
'state_cov': model['state_cov']}, }
if not settings['reduce_memory']:
params[target]['arma']['results'] = res
params[target]['arma']['residual'] = res.resid
return params

def getResidual(self, initial, params, pivot, settings):
Expand All @@ -226,7 +228,7 @@ def getResidual(self, initial, params, pivot, settings):

residual = initial.copy()
for t, (target, data) in enumerate(params.items()):
residual[:, t] = data['arma']['results'].resid
residual[:, t] = data['arma']['residual']

return residual

Expand Down Expand Up @@ -293,16 +295,18 @@ def generate(self, params, pivot, settings):
synthetic = np.zeros((len(pivot), len(params)))
for t, (target, data) in enumerate(params.items()):
armaData = data['arma']
P,d,Q = armaData['lags']
modelParams = np.r_[armaData.get('const', 0), armaData['ar'], armaData['ma'], armaData.get('var', 1)]
msrShocks, stateShocks, initialState = self._generateNoise(armaData, synthetic.shape[0])
# measurement shocks
# statsmodels if we don't provide them.
import statsmodels.api
model = statsmodels.tsa.arima.model.ARIMA(synthetic[:,t], order=(P, d, Q), trend='c')
# produce sample
new = armaData['model'].simulate(modelParams,
synthetic.shape[0],
measurement_shocks=msrShocks,
state_shocks=stateShocks,
initial_state=initialState)
new = model.simulate(modelParams,
synthetic.shape[0],
measurement_shocks=msrShocks,
state_shocks=stateShocks,
initial_state=initialState)
if settings.get('gaussianize', True):
# back-transform through CDF
new = mathUtils.degaussianize(new, params[target]['cdf'])
Expand Down
57 changes: 48 additions & 9 deletions ravenframework/TSA/TSAUser.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(self):
self._tsaAlgoSettings = {} # initialization settings for each algorithm
self._tsaTrainedParams = {} # holds results of training each algorithm
self._tsaAlgorithms = [] # list and order for TSA algorithms to use
self._tsaGlobalAlgorithms = [] # list and order for global TSA algorithms to use
self.pivotParameterID = None # string name for time-like pivot parameter # TODO base class?
self.pivotParameterValues = None # values for the time-like pivot parameter # TODO base class?
self._paramNames = None # cached list of parameter names
Expand All @@ -83,7 +84,10 @@ def readTSAInput(self, spec):
if sub.name in factory.knownTypes():
algo = factory.returnInstance(sub.name)
self._tsaAlgoSettings[algo] = algo.handleInput(sub)
self._tsaAlgorithms.append(algo)
if self._tsaAlgoSettings[algo]['global']:
self._tsaGlobalAlgorithms.append(algo)
else:
self._tsaAlgorithms.append(algo)
foundTSAType = True
if foundTSAType is False:
options = ', '.join(factory.knownTypes())
Expand Down Expand Up @@ -176,11 +180,12 @@ def getParamsAsVars(self):
self._paramRealization = rlz
return self._paramRealization

def trainTSASequential(self, targetVals):
def trainTSASequential(self, targetVals, trainGlobal=False):
"""
Train TSA algorithms using a sequential removal-and-residual approach.
@ In, targetVals, array, shape = [n_timeStep, n_dimensions], array of time series data
NOTE: this should be a single history/realization, not an array of realizations
@ In, trainGlobal, bool, are we training on global signal?
@ Out, None
"""
pivotName = self.pivotParameterID
Expand All @@ -190,8 +195,12 @@ def trainTSASequential(self, targetVals):
pivots = targetVals[0, :, pivotIndex]
self.pivotParameterValues = pivots[:] # TODO any way to avoid storing these?

residual = targetVals[:, :, :] # deep-ish copy, so we don't mod originals
for a, algo in enumerate(self._tsaAlgorithms):
# if NOT training globally, deep-ish copy, so we don't mod originals
residual = targetVals if trainGlobal else targetVals[:, :, :]
# check if training globally, if so we only train global algos
algorithms = self._tsaGlobalAlgorithms if trainGlobal else self._tsaAlgorithms

for a, algo in enumerate(algorithms):
settings = self._tsaAlgoSettings[algo]
targets = settings['target']
indices = tuple(self.target.index(t) for t in targets)
Expand All @@ -211,10 +220,12 @@ def trainTSASequential(self, targetVals):
residual[0, :, indices] = algoResidual.T # transpose, again because of indices
# TODO meta store signal, residual?

def evaluateTSASequential(self):
def evaluateTSASequential(self, evalGlobal=False, evaluation=None, slicer=None):
"""
Evaluate TSA algorithms using a sequential linear superposition approach
@ In, None
@ In, evalGlobal, bool, are these algos trained on global signal?
@ In, evaluation, dict, realization dictionary of values for each target
@ In, slicer, list of slice, indexer for data range of this segment FROM GLOBAL SIGNAL
@ Out, rlz, dict, realization dictionary of values for each target
"""
pivots = self.pivotParameterValues
Expand All @@ -223,8 +234,21 @@ def evaluateTSASequential(self):
# that ignores the pivotParameter on which to index the results variables
noPivotTargets = [x for x in self.target if x != self.pivotParameterID]
result = np.zeros((self.pivotParameterValues.size, len(noPivotTargets)))
needToRecombine = False

for algo in self._tsaAlgorithms[::-1]:
# check if training globally, if so we only apply global algos to given realizations
if evalGlobal:
algorithms = self._tsaGlobalAlgorithms[::-1]
if slicer:
needToRecombine = True
for i,s in enumerate(slicer):
result[s] += np.array([evaluation[target][i].tolist() for target in noPivotTargets]).T
else:
result += np.array([evaluation[target].tolist() for target in noPivotTargets]).T
else:
algorithms = self._tsaAlgorithms[::-1]

for algo in algorithms:
settings = self._tsaAlgoSettings[algo]
targets = settings['target']
indices = tuple(noPivotTargets.index(t) for t in targets)
Expand All @@ -237,11 +261,26 @@ def evaluateTSASequential(self):
else: # Must be exclusively a TimeSeriesCharacterizer, so there is nothing to evaluate
continue
# RAVEN realization construction
rlz = dict((target, result[:, t]) for t, target in enumerate(noPivotTargets))
rlz[self.pivotParameterID] = self.pivotParameterValues
if needToRecombine:
rlz = dict((target, np.vstack([[result[s, t]] for s in slicer])) for t, target in enumerate(noPivotTargets))
rlz[self.pivotParameterID] = evaluation[self.pivotParameterID]
else:
rlz = dict((target, result[:, t]) for t, target in enumerate(noPivotTargets))
rlz[self.pivotParameterID] = self.pivotParameterValues

return rlz

def getGlobalTSARomSettings(self):
"""
Train TSA algorithms using a sequential removal-and-residual approach.
@ In, None
@ Out, settings
"""
globalSettings = {}
for algo in self._tsaGlobalAlgorithms:
globalSettings[algo] = self._tsaTrainedParams[algo]
return globalSettings

def writeTSAtoXML(self, xml):
"""
Write properties of TSA algorithms to XML
Expand Down
10 changes: 9 additions & 1 deletion ravenframework/TSA/TimeSeriesAnalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ def getInputSpecification(cls):
descr=r"""indicates the variables for which this algorithm will be used for characterization. """)
specs.addParam('seed', param_type=InputTypes.IntegerType, required=False,
descr=r"""sets a seed for the underlying random number generator, if present.""")
specs.addParam('global', param_type=InputTypes.BoolType, required=False,
descr=r"""designates this algorithm to be used on full signal instead of per
segment. NOTE: because this is intended to be used when some algorithms are
applied segment-wise and others are applied globally, this is meant to be an
advanced feature and it is important to be mindful of the segments lengths.
E.g., some Fourier periods may be longer than the intended segment length, in
which case the this 'global' parameter should be set to True for better
fitting.""", default=False)
return specs

@classmethod
Expand Down Expand Up @@ -106,7 +114,7 @@ def handleInput(self, spec):
settings = {}
settings['target'] = spec.parameterValues['target']
settings['seed'] = spec.parameterValues.get('seed', None)

settings['global'] = spec.parameterValues.get('global', False)
settings = self.setDefaults(settings)

return settings
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
scaling,filename
1,GlobalFourierARMA_A_0.csv
Loading