Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Jimmy testing representativity based on changes in basic stat #1790

Closed
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions framework/Metrics/metrics/Factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from .CDFAreaDifference import CDFAreaDifference
from .PDFCommonArea import PDFCommonArea
from .ScipyMetric import ScipyMetric
from .RepresentativityFactors import RepresentativityFactors

factory = EntityFactory('Metrics')
factory.registerAllSubtypes(MetricInterface)
113 changes: 113 additions & 0 deletions framework/Metrics/metrics/RepresentativityFactors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright 2017 Battelle Energy Alliance, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Created on April 29 2021

@author: Mohammad Abdo (@Jimmy-INL)
"""
#for future compatibility with Python 3--------------------------------------------------------------
from __future__ import division, print_function, unicode_literals, absolute_import
#End compatibility block for Python 3----------------------------------------------------------------
Jimmy-INL marked this conversation as resolved.
Show resolved Hide resolved

#External Modules------------------------------------------------------------------------------------
import numpy as np
import copy
#import scipy.spatial.distance as spatialDistance
#External Modules End--------------------------------------------------------------------------------

#Internal Modules------------------------------------------------------------------------------------
from .MetricInterface import MetricInterface
# from Metrics.metrics import MetricUtilities
from utils import InputData, InputTypes
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use new ravenframework import strategy to import the modules

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@joshua-cogliati-inl, could you direct me to the new import strategy?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#Internal Modules End--------------------------------------------------------------------------------

class RepresentativityFactors(MetricInterface):
"""
RepresntativityFactors is the metric class used to quantitatively
assess the relativeness of a mock experiment to the target plant.
"""
availScaling ={}

@classmethod
def getInputSpecification(cls):
"""
Method to get a reference to a class that specifies the input data for
class cls.
@ In, cls, the class for which we are retrieving the specification
@ Out, inputSpecification, InputData.ParameterInput, class to use for
specifying input of cls.
"""
inputSpecification = super(RepresentativityFactors, cls).getInputSpecification()
actionTypeInput = InputData.parameterInputFactory("actionType", contentType=InputTypes.StringType)
inputSpecification.addSub(actionTypeInput)

return inputSpecification

def __init__(self):
"""
Constructor
@ In, None
@ Out, None
"""
# Metric.__init__(self)
Jimmy-INL marked this conversation as resolved.
Show resolved Hide resolved
super().__init__()
# The type of given analysis
self.actionType = None
# True indicates the metric needs to be able to handle dynamic data
self._dynamicHandling = True
# True indicates the metric needs to be able to handle pairwise data
self._pairwiseHandling = False

def _localReadMoreXML(self, xmlNode):
"""
Method that reads the portion of the xml input that belongs to this specialized class
and initialize internal parameters
@ In, xmlNode, xml.etree.Element, Xml element node
@ Out, None
"""
paramInput = Metric.getInputSpecification()()
paramInput.parseNode(xmlNode)
for child in paramInput.subparts:
if child.getName() == "actionType":
self.order = child.value
Jimmy-INL marked this conversation as resolved.
Show resolved Hide resolved
else:
self.raiseAnError(IOError, "Unknown xml node ", child.getName(), " is provided for metric system")

def run(self, x, y, weights = None, axis = 0, **kwargs):
"""
This method computes DSS distance between two inputs x and y based on given metric
@ In, x, numpy.ndarray, array containing data of x, if 1D array is provided,
the array will be reshaped via x.reshape(-1,1), shape (n_samples, ), if 2D
array is provided, shape (n_samples, n_time_steps)
@ In, y, numpy.ndarray, array containing data of y, if 1D array is provided,
the array will be reshaped via y.reshape(-1,1), shape (n_samples, ), if 2D
array is provided, shape (n_samples, n_time_steps)
@ In, weights, array_like (numpy.array or list), optional, weights associated
with input, shape (n_samples) if axis = 0, otherwise shape (n_time_steps)
@ In, axis, integer, optional, axis along which a metric is performed, default is 0,
i.e. the metric will performed along the first dimension (the "rows").
If metric postprocessor is used, the first dimension is the RAVEN_sample_ID,
and the second dimension is the pivotParameter if HistorySet is provided.
@ In, kwargs, dict, dictionary of parameters characteristic of each metric
@ Out, value, float, metric result
"""
# assert (isinstance(x, np.ndarray))
# assert (isinstance(y, np.ndarray))
senMeasurables = kwargs['senMeasurables']
senFOMs = kwargs['senFOMs']
covParameters = kwargs['covParameters']
r = (senFOMs.T @ covParameters @ senMeasurables)/\
np.sqrt(senFOMs.T @ covParameters @ senFOMs)/\
np.sqrt(senMeasurables.T @ covParameters @ senMeasurables)
Comment on lines +83 to +88
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need to create a metric class to do this calculation? It seems to me this calculation is only specific to representativity. How can we use it for other applications? If not, I would suggest to move it the representativity PP.

return r
79 changes: 28 additions & 51 deletions framework/Models/PostProcessors/BasicStatistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@
#External Modules End-----------------------------------------------------------

#Internal Modules---------------------------------------------------------------
from .PostProcessorInterface import PostProcessorInterface
from .PostProcessorReadyInterface import PostProcessorReadyInterface
from utils import utils
from utils import InputData, InputTypes
from utils import mathUtils
import Files
#Internal Modules End-----------------------------------------------------------

class BasicStatistics(PostProcessorInterface):
class BasicStatistics(PostProcessorReadyInterface):
"""
BasicStatistics filter class. It computes all the most popular statistics
"""
Expand Down Expand Up @@ -142,6 +142,7 @@ def __init__(self):
self.sampleSize = None # number of sample size
self.calculations = {}
self.validDataType = ['PointSet', 'HistorySet', 'DataSet'] # The list of accepted types of DataObject
self.setInputDataType('xrDataset')

def inputToInternal(self, currentInp):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may need to restructure this method.

"""
Expand All @@ -150,58 +151,20 @@ def inputToInternal(self, currentInp):
@ In, currentInp, object, an object that needs to be converted
@ Out, (inputDataset, pbWeights), tuple, the dataset of inputs and the corresponding variable probability weight
"""
# The BasicStatistics postprocessor only accept DataObjects
self.dynamic = False
# The BasicStatistics postprocessor only accept Datasets
currentInput = currentInp [-1] if type(currentInp) == list else currentInp
if len(currentInput) == 0:
self.raiseAnError(IOError, "In post-processor " +self.name+" the input "+currentInput.name+" is empty.")

pbWeights = None
if type(currentInput).__name__ == 'tuple':
return currentInput
# TODO: convert dict to dataset, I think this will be removed when DataSet is used by other entities that
# are currently using this Basic Statisitics PostProcessor.
if type(currentInput).__name__ == 'dict':
if 'targets' not in currentInput.keys():
self.raiseAnError(IOError, 'Did not find targets in the input dictionary')
inputDataset = xr.Dataset()
for var, val in currentInput['targets'].items():
inputDataset[var] = val
if 'metadata' in currentInput.keys():
metadata = currentInput['metadata']
self.pbPresent = True if 'ProbabilityWeight' in metadata else False
if self.pbPresent:
pbWeights = xr.Dataset()
self.realizationWeight = xr.Dataset()
self.realizationWeight['ProbabilityWeight'] = metadata['ProbabilityWeight']/metadata['ProbabilityWeight'].sum()
for target in self.parameters['targets']:
pbName = 'ProbabilityWeight-' + target
if pbName in metadata:
pbWeights[target] = metadata[pbName]/metadata[pbName].sum()
elif self.pbPresent:
pbWeights[target] = self.realizationWeight['ProbabilityWeight']
else:
self.raiseAWarning('BasicStatistics postprocessor did not detect ProbabilityWeights! Assuming unit weights instead...')
else:
self.raiseAWarning('BasicStatistics postprocessor did not detect ProbabilityWeights! Assuming unit weights instead...')
if 'RAVEN_sample_ID' not in inputDataset.sizes.keys():
self.raiseAWarning('BasicStatisitics postprocessor did not detect RAVEN_sample_ID! Assuming the first dimension of given data...')
self.sampleTag = utils.first(inputDataset.sizes.keys())
return inputDataset, pbWeights

if currentInput.type not in ['PointSet','HistorySet']:
self.raiseAnError(IOError, self, 'BasicStatistics postprocessor accepts PointSet and HistorySet only! Got ' + currentInput.type)

# extract all required data from input DataObjects, an input dataset is constructed
dataSet = currentInput.asDataset()
inpVars, outVars, dataSet = currentInput['Data'][0]
try:
inputDataset = dataSet[self.parameters['targets']]
except KeyError:
missing = [var for var in self.parameters['targets'] if var not in dataSet]
self.raiseAnError(KeyError, "Variables: '{}' missing from dataset '{}'!".format(", ".join(missing),currentInput.name))
self.sampleTag = currentInput.sampleTag
self.sampleTag = utils.first(dataSet.dims)

if currentInput.type == 'HistorySet':
if self.dynamic:
dims = inputDataset.sizes.keys()
if self.pivotParameter is None:
if len(dims) > 1:
Expand All @@ -212,22 +175,21 @@ def inputToInternal(self, currentInp):
requested variables', ','.join(self.parameters['targets']))
else:
self.dynamic = True
if not currentInput.checkIndexAlignment(indexesToCheck=self.pivotParameter):
self.raiseAnError(IOError, "The data provided by the data objects", currentInput.name, "is not synchronized!")
#if not currentInput.checkIndexAlignment(indexesToCheck=self.pivotParameter):
# self.raiseAnError(IOError, "The data provided by the data objects", currentInput.name, "is not synchronized!")
self.pivotValue = inputDataset[self.pivotParameter].values
if self.pivotValue.size != len(inputDataset.groupby(self.pivotParameter)):
msg = "Duplicated values were identified in pivot parameter, please use the 'HistorySetSync'" + \
" PostProcessor to syncronize your data before running 'BasicStatistics' PostProcessor."
self.raiseAnError(IOError, msg)
# extract all required meta data
metaVars = currentInput.getVars('meta')
self.pbPresent = True if 'ProbabilityWeight' in metaVars else False
self.pbPresent = 'ProbabilityWeight' in dataSet
if self.pbPresent:
pbWeights = xr.Dataset()
self.realizationWeight = dataSet[['ProbabilityWeight']]/dataSet[['ProbabilityWeight']].sum()
for target in self.parameters['targets']:
pbName = 'ProbabilityWeight-' + target
if pbName in metaVars:
if pbName in dataSet:
pbWeights[target] = dataSet[pbName]/dataSet[pbName].sum()
elif self.pbPresent:
pbWeights[target] = self.realizationWeight['ProbabilityWeight']
Expand Down Expand Up @@ -1211,7 +1173,7 @@ def getCovarianceSubset(desired):
if self.pivotParameter in outputSet.sizes.keys():
outputDict[self.pivotParameter] = np.atleast_1d(self.pivotValue)

return outputDict
return outputDict,outputSet

def corrCoeff(self, covM):
"""
Expand Down Expand Up @@ -1375,14 +1337,29 @@ def spearmanCorrelation(self, featVars, targVars, featSamples, targSamples, pbWe
da = xr.DataArray(spearmanMat, dims=('targets','features'), coords={'targets':targVars,'features':featVars})
return da

def _runLegacy(self, inputIn):
"""
This method executes the postprocessor action with the old data format. In this case, it computes all the requested statistical FOMs
@ In, inputIn, object, object contained the data to process. (inputToInternal output)
@ Out, outputSet, xarray.Dataset or dictionary, dataset or dictionary containing the results
"""
if type(inputIn).__name__ == 'PointSet':
merged = inputIn.asDataset()
elif 'metadata' in inputIn:
merged = xr.merge([inputIn['metadata'],inputIn['targets']])
else:
merged = xr.merge([inputIn['targets']])
newInputIn = {'Data':[[None,None,merged]]}
return self.run(newInputIn)

def run(self, inputIn):
"""
This method executes the postprocessor action. In this case, it computes all the requested statistical FOMs
@ In, inputIn, object, object contained the data to process. (inputToInternal output)
@ Out, outputSet, xarray.Dataset or dictionary, dataset or dictionary containing the results
"""
inputData = self.inputToInternal(inputIn)
outputSet = self.__runLocal(inputData)
_,outputSet = self.__runLocal(inputData)
return outputSet

def collectOutput(self, finishedJob, output):
Expand Down
1 change: 1 addition & 0 deletions framework/Models/PostProcessors/Factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from .EconomicRatio import EconomicRatio
from .ValidationBase import ValidationBase
from .Validations import Probabilistic
from .Validations import Representativity
from .TSACharacterizer import TSACharacterizer

### PostProcessorFunctions (orig: InterfacedPostProcessor)
Expand Down
4 changes: 2 additions & 2 deletions framework/Models/PostProcessors/LimitSurfaceIntegral.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,9 @@ def run(self, input):
f = np.vectorize(self.variableDist[varName].ppf, otypes=[np.float])
randomMatrix[:, index] = f(randomMatrix[:, index])
tempDict[varName] = randomMatrix[:, index]
pb = self.stat.run({'targets':{self.target:xarray.DataArray(self.functionS.evaluate(tempDict)[self.target])}})[self.computationPrefix +"_"+self.target]
pb = self.stat._runLegacy({'targets':{self.target:xarray.DataArray(self.functionS.evaluate(tempDict)[self.target])}})[self.computationPrefix +"_"+self.target]
if self.errorModel:
boundError = abs(pb-self.stat.run({'targets':{self.target:xarray.DataArray(self.errorModel.evaluate(tempDict)[self.target])}})[self.computationPrefix +"_"+self.target])
boundError = abs(pb-self.stat._runLegacy({'targets':{self.target:xarray.DataArray(self.errorModel.evaluate(tempDict)[self.target])}})[self.computationPrefix +"_"+self.target])
else:
self.raiseAnError(NotImplemented, "quadrature not yet implemented")
return pb, boundError
Expand Down
2 changes: 1 addition & 1 deletion framework/Models/PostProcessors/SafestPoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def run(self, input):
rlz['ProbabilityWeight'][ncLine] = np.prod(probList)
metadata = {'ProbabilityWeight':xarray.DataArray(rlz['ProbabilityWeight'])}
targets = {tar:xarray.DataArray( rlz[tar]) for tar in self.controllableOrd}
rlz['ExpectedSafestPointCoordinates'] = self.stat.run({'metadata':metadata, 'targets':targets})
rlz['ExpectedSafestPointCoordinates'] = self.stat._runLegacy({'metadata':metadata, 'targets':targets})
self.raiseADebug(rlz['ExpectedSafestPointCoordinates'])
return rlz

Expand Down
2 changes: 2 additions & 0 deletions framework/Models/PostProcessors/Validations/Probabilistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ def _getDataFromDataDict(self, datasets, var, names=None):
pw = None
if "|" in var and names is not None:
do, feat = var.split("|")
# doIndex = names.index(do)
# dat = datasets[doIndex][feat]
Jimmy-INL marked this conversation as resolved.
Show resolved Hide resolved
dat = datasets[do][feat]
else:
for doIndex, ds in enumerate(datasets):
Expand Down
Loading