idaholab · dylanjm · Oct 5, 2023 · Sep 26, 2023 · Sep 27, 2023 · Sep 28, 2023
diff --git a/ravenframework/SupervisedLearning/SupervisedLearning.py b/ravenframework/SupervisedLearning/SupervisedLearning.py
@@ -753,13 +753,14 @@ def adjustLocalRomSegment(self, settings, picker):
     # by default, do nothing
     pass
 
-  def finalizeLocalRomSegmentEvaluation(self, settings, evaluation, picker):
+  def finalizeLocalRomSegmentEvaluation(self, settings, evaluation, globalPicker, localPicker=None):
     """
       Allows global settings in "settings" to affect a LOCAL evaluation of a LOCAL ROM
       Note this is called on the LOCAL subsegment ROM and not the GLOBAL templateROM.
       @ In, settings, dict, as from getGlobalRomSegmentSettings
       @ In, evaluation, dict, preliminary evaluation from the local segment ROM as {target: [values]}
-      @ In, picker, slice, indexer for data range of this segment
+      @ In, globalPicker, slice, indexer for data range of this segment FROM GLOBAL SIGNAL
+      @ In, localPicker, slice, optional, indexer for part of signal that should be adjusted IN LOCAL SIGNAL
       @ Out, evaluation, dict, {target: np.ndarray} adjusted global evaluation
     """
     return evaluation

diff --git a/ravenframework/SupervisedLearning/SyntheticHistory.py b/ravenframework/SupervisedLearning/SyntheticHistory.py
@@ -20,6 +20,7 @@
 """
 import numpy as np
 import collections
+import copy
 
 from ..utils import InputData, xmlUtils
 from ..TSA import TSAUser
@@ -78,6 +79,8 @@ def _handleInput(self, paramInput):
     """
     SupervisedLearning._handleInput(self, paramInput)
     self.readTSAInput(paramInput)
+    if len(self._tsaAlgorithms)==0:
+      self.raiseAWarning("No Segmenting algorithms were requested.")
 
   def _train(self, featureVals, targetVals):
     """
@@ -97,6 +100,55 @@ def __evaluateLocal__(self, featureVals):
     rlz = self.evaluateTSASequential()
     return rlz
 
+
+  def getGlobalRomSegmentSettings(self, trainingDict, divisions):
+    """
+      Allows the ROM to perform some analysis before segmenting.
+      Note this is called on the GLOBAL templateROM from the ROMcollection, NOT on the LOCAL subsegment ROMs!
+      @ In, trainingDict, dict, data for training, full and unsegmented
+      @ In, divisions, tuple, (division slice indices, unclustered spaces)
+      @ Out, settings, object, arbitrary information about ROM clustering settings
+      @ Out, trainingDict, dict, adjusted training data (possibly unchanged)
+    """
+    self.raiseADebug('Training Global...')
+    # extracting info from training Dict, convert all signals to single array
+    trainingDict = copy.deepcopy(trainingDict)
+    names, values  = list(trainingDict.keys()), list(trainingDict.values())
+    ## This is for handling the special case needed by skl *MultiTask* that
+    ## requires multiple targets.
+    targetValues = []
+    targetNames = []
+    for target in self.target:
+      if target in names:
+        targetValues.append(values[names.index(target)])
+        targetNames.append(target)
+      else:
+        self.raiseAnError(IOError,'The target '+target+' is not in the training set')
+    # stack targets
+    targetValues = np.stack(targetValues, axis=-1)
+    self.trainTSASequential(targetValues, trainGlobal=True)
+    settings = self.getGlobalTSARomSettings()
+    # update targets in trainingDict
+    for i,target in enumerate(targetNames):
+      trainingDict[target] = targetValues[:,:,i]
+    return settings, trainingDict
+
+  def finalizeGlobalRomSegmentEvaluation(self, settings, evaluation, weights, slicer):
+    """
+      Allows any global settings to be applied to the signal collected by the ROMCollection instance.
+      Note this is called on the GLOBAL templateROM from the ROMcollection, NOT on the LOCAL supspace segment ROMs!
+      @ In, settings, dict, as from getGlobalRomSegmentSettings
+      @ In, evaluation, dict, {target: np.ndarray} evaluated full (global) signal from ROMCollection
+      @ In, weights, np.array(float), optional, if included then gives weight to histories for CDF preservation
+      @ In, slicer, slice, indexer for data range of this segment FROM GLOBAL SIGNAL
+      @ Out, evaluation, dict, {target: np.ndarray} adjusted global evaluation
+    """
+    if len(self._tsaGlobalAlgorithms)>0:
+      rlz = self.evaluateTSASequential(evalGlobal=True, evaluation=evaluation, slicer=slicer)
+      for key,val in rlz.items():
+        evaluation[key] = val
+    return evaluation
+
   def writePointwiseData(self, writeTo):
     """
       Writes pointwise data about this ROM to the data object.

diff --git a/ravenframework/TSA/ARMA.py b/ravenframework/TSA/ARMA.py
@@ -203,9 +203,11 @@ def fit(self, signal, pivot, targets, settings):
                                 'ma': res.polynomial_ma[1:],     # MA
                                 'var': res.params[res.param_names.index('sigma2')],  # variance
                                 'initials': initDist,   # characteristics for sampling initial states
-                                'model': model}
+                                'lags': [P,d,Q],
+                                'model': {'obs_cov': model['obs_cov'],
+                                          'state_cov': model['state_cov']}, }
       if not settings['reduce_memory']:
-        params[target]['arma']['results'] = res
+        params[target]['arma']['residual'] = res.resid
     return params
 
   def getResidual(self, initial, params, pivot, settings):
@@ -226,7 +228,7 @@ def getResidual(self, initial, params, pivot, settings):
 
     residual = initial.copy()
     for t, (target, data) in enumerate(params.items()):
-      residual[:, t] = data['arma']['results'].resid
+      residual[:, t] = data['arma']['residual']
 
     return residual
 
@@ -293,16 +295,18 @@ def generate(self, params, pivot, settings):
     synthetic = np.zeros((len(pivot), len(params)))
     for t, (target, data) in enumerate(params.items()):
       armaData = data['arma']
+      P,d,Q = armaData['lags']
       modelParams = np.r_[armaData.get('const', 0), armaData['ar'], armaData['ma'], armaData.get('var', 1)]
       msrShocks, stateShocks, initialState = self._generateNoise(armaData, synthetic.shape[0])
       # measurement shocks
-      # statsmodels if we don't provide them.
+      import statsmodels.api
+      model = statsmodels.tsa.arima.model.ARIMA(synthetic[:,t], order=(P, d, Q), trend='c')
       # produce sample
-      new = armaData['model'].simulate(modelParams,
-                                       synthetic.shape[0],
-                                       measurement_shocks=msrShocks,
-                                       state_shocks=stateShocks,
-                                       initial_state=initialState)
+      new = model.simulate(modelParams,
+                           synthetic.shape[0],
+                           measurement_shocks=msrShocks,
+                           state_shocks=stateShocks,
+                           initial_state=initialState)
       if settings.get('gaussianize', True):
         # back-transform through CDF
         new = mathUtils.degaussianize(new, params[target]['cdf'])

diff --git a/ravenframework/TSA/TSAUser.py b/ravenframework/TSA/TSAUser.py
@@ -64,6 +64,7 @@ def __init__(self):
     self._tsaAlgoSettings = {}       # initialization settings for each algorithm
     self._tsaTrainedParams = {}      # holds results of training each algorithm
     self._tsaAlgorithms = []         # list and order for TSA algorithms to use
+    self._tsaGlobalAlgorithms = []   # list and order for global TSA algorithms to use
     self.pivotParameterID = None     # string name for time-like pivot parameter # TODO base class?
     self.pivotParameterValues = None # values for the time-like pivot parameter  # TODO base class?
     self._paramNames = None          # cached list of parameter names
@@ -83,7 +84,10 @@ def readTSAInput(self, spec):
       if sub.name in factory.knownTypes():
         algo = factory.returnInstance(sub.name)
         self._tsaAlgoSettings[algo] = algo.handleInput(sub)
-        self._tsaAlgorithms.append(algo)
+        if self._tsaAlgoSettings[algo]['global']:
+          self._tsaGlobalAlgorithms.append(algo)
+        else:
+          self._tsaAlgorithms.append(algo)
         foundTSAType = True
     if foundTSAType is False:
       options = ', '.join(factory.knownTypes())
@@ -176,11 +180,12 @@ def getParamsAsVars(self):
       self._paramRealization = rlz
     return self._paramRealization
 
-  def trainTSASequential(self, targetVals):
+  def trainTSASequential(self, targetVals, trainGlobal=False):
     """
       Train TSA algorithms using a sequential removal-and-residual approach.
       @ In, targetVals, array, shape = [n_timeStep, n_dimensions], array of time series data
         NOTE: this should be a single history/realization, not an array of realizations
+      @ In, trainGlobal, bool, are we training on global signal?
       @ Out, None
     """
     pivotName = self.pivotParameterID
@@ -190,8 +195,12 @@ def trainTSASequential(self, targetVals):
     pivots = targetVals[0, :, pivotIndex]
     self.pivotParameterValues = pivots[:] # TODO any way to avoid storing these?
 
-    residual = targetVals[:, :, :] # deep-ish copy, so we don't mod originals
-    for a, algo in enumerate(self._tsaAlgorithms):
+    # if NOT training globally, deep-ish copy, so we don't mod originals
+    residual = targetVals if trainGlobal else targetVals[:, :, :]
+    # check if training globally, if so we only train global algos
+    algorithms = self._tsaGlobalAlgorithms if trainGlobal else self._tsaAlgorithms
+
+    for a, algo in enumerate(algorithms):
       settings = self._tsaAlgoSettings[algo]
       targets = settings['target']
       indices = tuple(self.target.index(t) for t in targets)
@@ -211,10 +220,12 @@ def trainTSASequential(self, targetVals):
         residual[0, :, indices] = algoResidual.T # transpose, again because of indices
       # TODO meta store signal, residual?
 
-  def evaluateTSASequential(self):
+  def evaluateTSASequential(self, evalGlobal=False, evaluation=None, slicer=None):
     """
       Evaluate TSA algorithms using a sequential linear superposition approach
-      @ In, None
+      @ In, evalGlobal, bool, are these algos trained on global signal?
+      @ In, evaluation, dict, realization dictionary of values for each target
+      @ In, slicer, list of slice, indexer for data range of this segment FROM GLOBAL SIGNAL
       @ Out, rlz, dict, realization dictionary of values for each target
     """
     pivots = self.pivotParameterValues
@@ -223,8 +234,21 @@ def evaluateTSASequential(self):
     # that ignores the pivotParameter on which to index the results variables
     noPivotTargets = [x for x in self.target if x != self.pivotParameterID]
     result = np.zeros((self.pivotParameterValues.size, len(noPivotTargets)))
+    needToRecombine = False
 
-    for algo in self._tsaAlgorithms[::-1]:
+    # check if training globally, if so we only apply global algos to given realizations
+    if evalGlobal:
+      algorithms = self._tsaGlobalAlgorithms[::-1]
+      if slicer:
+        needToRecombine = True
+        for i,s in enumerate(slicer):
+          result[s] += np.array([evaluation[target][i].tolist() for target in noPivotTargets]).T
+      else:
+        result += np.array([evaluation[target].tolist() for target in noPivotTargets]).T
+    else:
+      algorithms = self._tsaAlgorithms[::-1]
+
+    for algo in algorithms:
       settings = self._tsaAlgoSettings[algo]
       targets = settings['target']
       indices = tuple(noPivotTargets.index(t) for t in targets)
@@ -237,11 +261,26 @@ def evaluateTSASequential(self):
       else:  # Must be exclusively a TimeSeriesCharacterizer, so there is nothing to evaluate
         continue
     # RAVEN realization construction
-    rlz = dict((target, result[:, t]) for t, target in enumerate(noPivotTargets))
-    rlz[self.pivotParameterID] = self.pivotParameterValues
+    if needToRecombine:
+      rlz = dict((target, np.vstack([[result[s, t]] for s in slicer])) for t, target in enumerate(noPivotTargets))
+      rlz[self.pivotParameterID] = evaluation[self.pivotParameterID]
+    else:
+      rlz = dict((target, result[:, t]) for t, target in enumerate(noPivotTargets))
+      rlz[self.pivotParameterID] = self.pivotParameterValues
 
     return rlz
 
+  def getGlobalTSARomSettings(self):
+    """
+      Train TSA algorithms using a sequential removal-and-residual approach.
+      @ In, None
+      @ Out, settings
+    """
+    globalSettings = {}
+    for algo in self._tsaGlobalAlgorithms:
+      globalSettings[algo] = self._tsaTrainedParams[algo]
+    return globalSettings
+
   def writeTSAtoXML(self, xml):
     """
       Write properties of TSA algorithms to XML

diff --git a/ravenframework/TSA/TimeSeriesAnalyzer.py b/ravenframework/TSA/TimeSeriesAnalyzer.py
@@ -45,6 +45,14 @@ def getInputSpecification(cls):
         descr=r"""indicates the variables for which this algorithm will be used for characterization. """)
     specs.addParam('seed', param_type=InputTypes.IntegerType, required=False,
         descr=r"""sets a seed for the underlying random number generator, if present.""")
+    specs.addParam('global', param_type=InputTypes.BoolType, required=False,
+                   descr=r"""designates this algorithm to be used on full signal instead of per
+                   segment. NOTE: because this is intended to be used when some algorithms are
+                   applied segment-wise and others are applied globally, this is meant to be an
+                   advanced feature and it is important to be mindful of the segments lengths.
+                   E.g., some Fourier periods may be longer than the intended segment length, in
+                   which case the this 'global' parameter should be set to True for better
+                   fitting.""", default=False)
     return specs
 
   @classmethod
@@ -106,7 +114,7 @@ def handleInput(self, spec):
     settings = {}
     settings['target'] = spec.parameterValues['target']
     settings['seed'] = spec.parameterValues.get('seed', None)
-
+    settings['global'] = spec.parameterValues.get('global', False)
     settings = self.setDefaults(settings)
 
     return settings

diff --git a/tests/framework/ROM/TimeSeries/SyntheticHistory/TrainingData/GlobalFourierARMA_A.csv b/tests/framework/ROM/TimeSeries/SyntheticHistory/TrainingData/GlobalFourierARMA_A.csv
@@ -0,0 +1,2 @@
+scaling,filename
+1,GlobalFourierARMA_A_0.csv