diff --git a/README.md b/README.md index 31a5203b60..d493bf4f36 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Raven + Risk Analysis Virtual Environment RAVEN (Risk Analysis Virtual Environment) is one of the many INL-developed software tools researchers can use to identify and increase the safety margin in nuclear reactor systems. diff --git a/doc/user_manual/install.tex b/doc/user_manual/install.tex index 93e83ecafe..fb53e37500 100644 --- a/doc/user_manual/install.tex +++ b/doc/user_manual/install.tex @@ -50,6 +50,7 @@ \section{RAVEN Dependencies Installation} \hline scipy & Scientific computing package for Python \\ \hline scikit-learn & Machine learning library for Python \\ \hline matplotlib & Plotting library for Python \\ + \hline xarray & High-dimension field data containers for Python \\ \hline \end{tabular} \end{center} @@ -64,7 +65,7 @@ \subsection{Preparing a Linux System for RAVEN} \label{sysprep_linux} The installation of RAVEN dependencies on a Linux system can be performed -using two alternative methods: +using two alternative methods: \begin{itemize} \item Native distribution's package manager @@ -73,7 +74,7 @@ \subsection{Preparing a Linux System for RAVEN} Using one of the above automates the process and automatically includes any needed dependencies of the requested packages. Below are instructions for -doing so for two popular Linux distributions, Ubuntu and Fedora. +doing so for two popular Linux distributions, Ubuntu and Fedora. \subsubsection{Ubuntu} @@ -91,7 +92,7 @@ \subsubsection{Ubuntu} \paragraph{Miniconda} -The Minoconda package manager is a cross platform installation package specifically +The Minoconda package manager is a cross platform installation package specifically used for Python dependencies installation. The package manager can be downloaded and installed from \url{https://conda.io/miniconda.html}. After the installation of Minconda, the following command needs to be executed for the installation of @@ -126,12 +127,15 @@ \subsubsection{Fedora} python3-devel numpy h5py scipy python-scikit-learn \ python-matplotlib-qt4 \end{lstlisting} +\begin{lstlisting}[language=bash] +pip install pandas xarray +\end{lstlisting} Note: The 'dnf' command replaces 'yum' used on older versions of Fedora Linux. \paragraph{Miniconda} -The Minoconda package manager is a cross platform installation package specifically +The Minoconda package manager is a cross platform installation package specifically used for Python dependencies installation. The package manager can be downloaded and installed from \url{https://conda.io/miniconda.html}. After the installation of Minconda, the following command needs to be executed for the installation of @@ -174,7 +178,7 @@ \subsubsection{Installing XCode Command Line Tools} \subsubsection{Installing XQuartz} XQuartz is an implementation of the X Server for the Mac OSX operating system. -XQuartz is freely available on the web and can be downloaded from the link +XQuartz is freely available on the web and can be downloaded from the link \url{https://dl.bintray.com/xquartz/downloads/XQuartz-2.7.9.dmg}. \\After downloaded, install the package. @@ -194,7 +198,7 @@ \subsubsection{Install RAVEN libraries} matplotlib=1.5.1 python=2.7 hdf5 swig pylint lxml \end{lstlisting} -This command will install all the libraries and dependencies needed for executing RAVEN +This command will install all the libraries and dependencies needed for executing RAVEN in a Miniconda enviroment called ``raven\_libraries''. \goToRavenInstallation @@ -227,8 +231,8 @@ \subsubsection{Installation and Configuration of the MSYS2 environment} \item Obtain and run the latest basic 64-bit MSYS2 installer from \url{ https://msys2.github.io/} (As of this writing it is named msys2-x86\_64-20161025.exe and is approximately 67 Megabytes in size). \item The page with the download also contains installation instructions. Perform the steps described there up to - step 6 to install a minimal MSYS2 system and bring it up to date. Make sure that you install to path - C:\textbackslash{}msys64. This installation will create shortcuts in the Windows start menu that may be used + step 6 to install a minimal MSYS2 system and bring it up to date. Make sure that you install to path + C:\textbackslash{}msys64. This installation will create shortcuts in the Windows start menu that may be used to start UNIX-Like shells: \begin{itemize} \item MSYS2 Shell @@ -242,18 +246,18 @@ \subsubsection{Installation and Configuration of the MSYS2 environment} USER@HOSTNAME MINGW64 ~ $ pacman -S git winpty make \end{lstlisting} - The package manager will then download and install those packages (and their dependencies) from the MSYS2 + The package manager will then download and install those packages (and their dependencies) from the MSYS2 repository. \end{enumerate} \subsubsection{Install Python Language and Package Support} \begin{enumerate} - \item Download the latest 64-bit installer for Windows Python 2.7 from - \url{https://conda.io/miniconda.html} and install it. \item The installer + \item Download the latest 64-bit installer for Windows Python 2.7 from + \url{https://conda.io/miniconda.html} and install it. \item The installer will ask whether Python should be installed for only the logged in user or for all users. Either option will work for RAVEN. - \item Locate and test the Python installation. Open a Windows command prompt and enter the - command "{\it where python}", which attempts to locate a the Python language interpreter + \item Locate and test the Python installation. Open a Windows command prompt and enter the + command "{\it where python}", which attempts to locate a the Python language interpreter in the current system path. This looks like: \begin{lstlisting}[language=bash, basicstyle=\small] @@ -261,14 +265,14 @@ \subsubsection{Install Python Language and Package Support} C:\Users\USERID\AppData\Local\Continuum\Miniconda2\python.exe \end{lstlisting} - \item Setup MSYS2 to find Python. MSYS2 has its own separate PATH which must also be adjusted - so that Python and its associated tools may be found. This is done by converting the - file system location of Python determined in the previous step to its MSYS2-compatible + \item Setup MSYS2 to find Python. MSYS2 has its own separate PATH which must also be adjusted + so that Python and its associated tools may be found. This is done by converting the + file system location of Python determined in the previous step to its MSYS2-compatible equivalent and using the result to setup MSYS2 so that it too can find it in the future. \newline \newline - This is done by turning all backslashes ('\textbackslash') in the path to be converted to - forward slashes ('/'), and changing the drive letter from its '\textless letter\textgreater:' - form to '/ \textless letter\textgreater'. In addition, any spaces in the path must + This is done by turning all backslashes ('\textbackslash') in the path to be converted to + forward slashes ('/'), and changing the drive letter from its '\textless letter\textgreater:' + form to '/ \textless letter\textgreater'. In addition, any spaces in the path must be escaped using a backslash ('\textbackslash') when converted. \newline \newline For example: @@ -289,13 +293,13 @@ \subsubsection{Install Python Language and Package Support} /c/Program\ Files/Common\ Files \end{lstlisting} \medskip - Three separate paths must be added to MSYS2 to enable all of the Python tools needed + Three separate paths must be added to MSYS2 to enable all of the Python tools needed to be found. These are: \smallskip \begin{tabular}{| l | l |} \hline - {\bf Path} & {\bf Purpose} \\\hline + {\bf Path} & {\bf Purpose} \\\hline \textless Converted path from above\textgreater & Python executable \\\hline \textless Converted path from above\textgreater /Scripts & Conda (Needed to manage Python packages) \\\hline \textless Converted path from above\textgreater /Library/bin & Swig (Needed to build RAVEN) \\\hline @@ -310,20 +314,20 @@ \subsubsection{Install Python Language and Package Support} export PATH=/c/Users/USERID/AppData/Local/Continuum/Miniconda2/Scripts:$PATH export PATH=/c/Users/USERID/AppData/Local/Continuum/Miniconda2/Library/bin:$PATH \end{lstlisting} - + To configure these needed paths in MSYS2 so that they persist, file "~/.bashrc" will need - to be edited. This may be done either using an MSYS2-based editor such as {\it vim} - (VI-iMproved, which is included in the installation) or a Windows-based editor like - {\it Wordpad} (included with Windows). Another excellent open source editor for + to be edited. This may be done either using an MSYS2-based editor such as {\it vim} + (VI-iMproved, which is included in the installation) or a Windows-based editor like + {\it Wordpad} (included with Windows). Another excellent open source editor for Windows is {\it Notepad++} \url{https://notepad-plus-plus.org/}, which is also good for editing RAVEN input files. - \item Test Python in MSYS2. At this point open a new MSYS2 shell window and see if Python is + \item Test Python in MSYS2. At this point open a new MSYS2 shell window and see if Python is now found in the PATH: - \newline - Note: Due to the way that Python interacts with the MSYS2 shell, when using Python by - itself in MSYS2 the {\it winpty} utility is provided. (If Python is run without winpty, - it may appear to sit there and do nothing. Pressing \textless Ctrl\textgreater -C will + \newline + Note: Due to the way that Python interacts with the MSYS2 shell, when using Python by + itself in MSYS2 the {\it winpty} utility is provided. (If Python is run without winpty, + it may appear to sit there and do nothing. Pressing \textless Ctrl\textgreater -C will interrupt it.) @@ -339,9 +343,9 @@ \subsubsection{Install Python Language and Package Support} \end{lstlisting} - \item Install needed Python packages. RAVEN requires several Python packages to function properly. - Now the {\it conda} command will be used to download and install them in an automated manner. The - following asks {\it conda} to obtain the specified versions of the listed packages, as well as all + \item Install needed Python packages. RAVEN requires several Python packages to function properly. + Now the {\it conda} command will be used to download and install them in an automated manner. The + following asks {\it conda} to obtain the specified versions of the listed packages, as well as all of their dependencies. \smallskip @@ -355,35 +359,35 @@ \subsubsection{Install Python Language and Package Support} \subsubsection{Compiler Installation and Configuration} \begin{enumerate} - \item Download and install Visual Studio. A C++ language compiler that supports C++11 features - is needed to perform this step. Microsoft's Visual Studio Community Edition is free and - available from \url{https://www.visualstudio.com/downloads/}. - - The current version (as of this writing) is 2017. The 2015 and 2017 versions have been - successfully used to build RAVEN. Professional and Enterprise versions of these will - also work. If one of these is already present on your system, it is not necessary to - obtain another one. Note that because C++11 language features are required, the + \item Download and install Visual Studio. A C++ language compiler that supports C++11 features + is needed to perform this step. Microsoft's Visual Studio Community Edition is free and + available from \url{https://www.visualstudio.com/downloads/}. + + The current version (as of this writing) is 2017. The 2015 and 2017 versions have been + successfully used to build RAVEN. Professional and Enterprise versions of these will + also work. If one of these is already present on your system, it is not necessary to + obtain another one. Note that because C++11 language features are required, the "Microsoft Visual C++ Compiler for Python 2.7" often used for building Python add-ons will {\bf not} work. - After downloading and running the Visual Studio installer, it will ask what features - to install. For building RAVEN, "Desktop development with C++" is needed at a minimum. + After downloading and running the Visual Studio installer, it will ask what features + to install. For building RAVEN, "Desktop development with C++" is needed at a minimum. Installation of other Visual Studio features should be fine. - \item Let the build system know where to find the compiler. When the build system attempts - to search for an installed compiler, this process often fails with the error message - "Unable to find vcvarsall.bat". This happens because Python version 2.7 has not been - updated to automatically locate modern Visual Studio installations. To solve this it - is necessary to help the Python build system find the C++ compiler on the system. - The easiest way to do this is create a Windows batch (.BAT) file that will redirect - the build system to the information it needs. First, locate the file VCVARSALL.BAT - file installed as part of Visual Studio on your system. This location will usually - be something like the following: + \item Let the build system know where to find the compiler. When the build system attempts + to search for an installed compiler, this process often fails with the error message + "Unable to find vcvarsall.bat". This happens because Python version 2.7 has not been + updated to automatically locate modern Visual Studio installations. To solve this it + is necessary to help the Python build system find the C++ compiler on the system. + The easiest way to do this is create a Windows batch (.BAT) file that will redirect + the build system to the information it needs. First, locate the file VCVARSALL.BAT + file installed as part of Visual Studio on your system. This location will usually + be something like the following: \smallskip \begin{tabular}{| l | l |} \hline - {\bf Visual Studio Version} & {\bf Directory containing VCVARSALL.BAT} \\\hline + {\bf Visual Studio Version} & {\bf Directory containing VCVARSALL.BAT} \\\hline 2015 & C:\textbackslash Program Files (x86)\textbackslash Microsoft Visual Studio 14.0\textbackslash VC \\\hline 2017 & C:\textbackslash Program Files (x86)\textbackslash Microsoft Visual \\ & Studio\textbackslash 2017\textbackslash Community\textbackslash VC\textbackslash @@ -391,8 +395,8 @@ \subsubsection{Compiler Installation and Configuration} \end{tabular} \medskip - Once the target file has been located it is necessary to create a couple of directories - and one file. The first directory created must be named "VC" and should be created + Once the target file has been located it is necessary to create a couple of directories + and one file. The first directory created must be named "VC" and should be created somewhere outside of the RAVEN source tree (such as your MinGW home directory): \begin{lstlisting}[language=bash] @@ -400,7 +404,7 @@ \subsubsection{Compiler Installation and Configuration} $ mkdir VC \end{lstlisting} - The next directory to be created must be inside the one just created. It is suggested + The next directory to be created must be inside the one just created. It is suggested to name it "target", because it is there that we will point the Python build system: \begin{lstlisting}[language=bash] @@ -416,9 +420,9 @@ \subsubsection{Compiler Installation and Configuration} \end{lstlisting} - The file to be created is named "VCVARSALL.BAT", and it must be written in the VC - directory that was just made. The Python build system will be configured to find this - file, which then redirects it to the actual file. Use a text editor (such as + The file to be created is named "VCVARSALL.BAT", and it must be written in the VC + directory that was just made. The Python build system will be configured to find this + file, which then redirects it to the actual file. Use a text editor (such as {\it vim} or {\it notepad} as described above) to create the file VCVARSALL.BAT: \begin{lstlisting}[language=bash] @@ -439,24 +443,24 @@ \subsubsection{Compiler Installation and Configuration} CALL "" %1 %2 %3 %4 %5 \end{lstlisting} - For example, in the case of Visual Studio 2017 Community installed in the default + For example, in the case of Visual Studio 2017 Community installed in the default location this would be: \begin{lstlisting}[language=bash, basicstyle=\tiny] CALL "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\VCVARSALL.BAT" %1 %2 %3 %4 %5 \end{lstlisting} - Note the double quotes around the path and file name. These are necessary because - there are spaces in some of the directory names that make up the full location of + Note the double quotes around the path and file name. These are necessary because + there are spaces in some of the directory names that make up the full location of VCVARSALL.BAT. - After creating the new {\it VCVARSALL.BAT} in the directory {\it VC}, one more thing - needs to be done to inform the Python build system where this file just created is. - During the build process, an {\it environment variable} "VS90COMNTOOLS" will be checked. - The value of VS90COMNTOOLS will need to be set to the {\it target} directory just below - the location of VCVARSALL.BAT file just created. + After creating the new {\it VCVARSALL.BAT} in the directory {\it VC}, one more thing + needs to be done to inform the Python build system where this file just created is. + During the build process, an {\it environment variable} "VS90COMNTOOLS" will be checked. + The value of VS90COMNTOOLS will need to be set to the {\it target} directory just below + the location of VCVARSALL.BAT file just created. - For example, if VCVARSALL.BAT was created in directory VC under your MinGW home + For example, if VCVARSALL.BAT was created in directory VC under your MinGW home directory, the variable VS90COMNTOOLS should point to \textasciitilde /VC/target. \begin{lstlisting}[language=bash] @@ -494,7 +498,7 @@ \subsection{Manual Dependency Install} \subsection{How RAVEN finds Dependencies} -RAVEN, when run from either \texttt{raven\_framework} or in +RAVEN, when run from either \texttt{raven\_framework} or in \texttt{run\_tests} runs a script called\\ \texttt{setup\_raven\_libs} which sets the up the dependencies if they are not already present. diff --git a/doc/user_manual/model.tex b/doc/user_manual/model.tex index 93eb6f7717..00ab1e6399 100644 --- a/doc/user_manual/model.tex +++ b/doc/user_manual/model.tex @@ -29,13 +29,13 @@ \section{Models} These aliases can be used anywhere in the RAVEN input to refer to the #1 variables. % - In the body of this node the user specifies the name of the variable that the model is going to use + In the body of this node the user specifies the name of the variable that the model is going to use (during its execution). % The actual alias, usable throughout the RAVEN input, is instead defined in the - \xmlAttr{variable} attribute of this tag. + \xmlAttr{variable} attribute of this tag. \\The user can specify aliases for both the input and the output space. As sanity check, RAVEN - requires an additional required attribute \xmlAttr{type}. This attribute can be either ``input'' or ``output''. + requires an additional required attribute \xmlAttr{type}. This attribute can be either ``input'' or ``output''. % \nb The user can specify as many aliases as needed. % @@ -900,7 +900,7 @@ \subsection{Dummy} ... - + another_name_for_this_variable_in_the_model @@ -1311,14 +1311,14 @@ \subsection{EnsembleModel} \nb All the inputs here specified need to be listed in the Steps where the EnsembleModel is used. \item \xmlNode{Output}, \xmlDesc{string, optional field}, - represents the output entities that need to be linked to this sub-model. \nb The \xmlNode{Output}s here specified are not part + represents the output entities that need to be linked to this sub-model. \nb The \xmlNode{Output}s here specified are not part of the determination of the EnsembleModel execution but represent an additional storage of results from the - sub-models. For example, if the \xmlNode{TargetEvaluation} is of type PointSet (since just scalar data needs to be transferred to other + sub-models. For example, if the \xmlNode{TargetEvaluation} is of type PointSet (since just scalar data needs to be transferred to other models) and the sub-model is able to also output history-type data, this Output can be of type HistorySet. Note that the structure of each Output dataObject must include only variables (either input or output) that are defined among the model. As an example, the Output dataObjects cannot contained variables that are defined at the Ensemble model level. % - The user can specify as many \xmlNode{Output} (s) as needed. The optional \xmlNode{Output}s can be of both classes ``DataObjects'' and ``Databases'' + The user can specify as many \xmlNode{Output} (s) as needed. The optional \xmlNode{Output}s can be of both classes ``DataObjects'' and ``Databases'' (e.g. PointSet, HistorySet, HDF5). \nb \textbf{The \xmlNode{Output} (s) here specified MUST be listed in the Step in which the EnsembleModel is used.} \end{itemize} @@ -1334,16 +1334,16 @@ \subsection{EnsembleModel} % \begin{itemize} \item \xmlNode{maxIterations}, \xmlDesc{integer, optional field}, - maximum number of Picard's iteration to be performed (in case the iteration scheme does + maximum number of Picard's iteration to be performed (in case the iteration scheme does not previously converge). \default{30}; \item \xmlNode{tolerance}, \xmlDesc{float, optional field}, - convergence criterion. It represents the L2 norm residue below which the Picard's iterative scheme is + convergence criterion. It represents the L2 norm residue below which the Picard's iterative scheme is considered converged. \default{0.001}; \item \xmlNode{initialConditions}, \xmlDesc{XML node, required parameter (if Picard's activated)}, Within this sub-node, the initial conditions for the input variables (that are part of a loop) need to be specified in sub-nodes named with the variable name (e.g. \xmlNode{varName}). The body of the - \xmlNode{varName} contains the value of the initial conditions (scalar or arrays, depending of the - type of variable). If an array needs to be inputted, the user can specify the attribute \xmlAttr{repeat} + \xmlNode{varName} contains the value of the initial conditions (scalar or arrays, depending of the + type of variable). If an array needs to be inputted, the user can specify the attribute \xmlAttr{repeat} and the code is going to repeat for \xmlAttr{repeat}-times the value inputted in the body. \item \xmlNode{initialStartModels}, xmlDesc{XML node, only required parameter when Picard's iteration is activated}, specifies the list of models that will be initially executed. \nb Do not input this node for non-Picard calculations, @@ -1407,7 +1407,7 @@ \subsection{EnsembleModel} 45.0 - + thermalConductivityComputation diff --git a/doc/user_manual/postprocessor.tex b/doc/user_manual/postprocessor.tex index 2e30173753..d8d6f510be 100644 --- a/doc/user_manual/postprocessor.tex +++ b/doc/user_manual/postprocessor.tex @@ -102,6 +102,7 @@ \subsubsection{BasicStatistics} the \textbf{variationCoefficient} will be \textbf{INF}. \item \textbf{skewness}: skewness \item \textbf{kurtosis}: excess kurtosis (also known as Fisher's kurtosis) + \item \textbf{samples}: the number of samples in the data set used to determine the statistics. \end{itemize} The matrix quantities available for request are: \begin{itemize} @@ -111,7 +112,6 @@ \subsubsection{BasicStatistics} \item \textbf{NormalizedSensitivity}: matrix of normalized sensitivity coefficients. \nb{It is the matrix of normalized VarianceDependentSensitivity} \item \textbf{VarianceDependentSensitivity}: matrix of sensitivity coefficients dependent on the variance of the variables - \item \textbf{samples}: the number of samples in the data set used to determine the statistics. \end{itemize} If all the quantities need to be computed, this can be done through the \xmlNode{all} node, which requires the \xmlNode{targets} and \xmlNode{features} sub-nodes. @@ -1289,10 +1289,10 @@ \subsubsection{Interfaced} \paragraph{Method: dataObjectLabelFilter} This Post-Processor allows to filter the portion of a dataObject, either PointSet or HistorySet, with a given clustering label. -A clustering algorithm associates a unique cluster label to each element of the dataObject (PointSet or HistorySet). -This cluster label is a natural number ranging from $0$ (or $1$ depending on the algorithm) to $N$ where $N$ is the number of obtained clusters. -Recall that some clustering algorithms (e.g., K-Means) receive $N$ as input while others (e.g., Mean-Shift) determine $N$ after clustering has been performed. -Thus, this Post-Processor is naturally employed after a data-mining clustering techniques has been performed on a dataObject so that each clusters +A clustering algorithm associates a unique cluster label to each element of the dataObject (PointSet or HistorySet). +This cluster label is a natural number ranging from $0$ (or $1$ depending on the algorithm) to $N$ where $N$ is the number of obtained clusters. +Recall that some clustering algorithms (e.g., K-Means) receive $N$ as input while others (e.g., Mean-Shift) determine $N$ after clustering has been performed. +Thus, this Post-Processor is naturally employed after a data-mining clustering techniques has been performed on a dataObject so that each clusters can be analyzed separately. In the \xmlNode{PostProcessor} input block, the following XML sub-nodes are required, @@ -1320,10 +1320,10 @@ \subsubsection{Interfaced} The user is required to provide the following information: \begin{itemize} - \item the set of input variables. For each variable the following need to be specified: + \item the set of input variables. For each variable the following need to be specified: \begin{itemize} \item the set of values that imply a reliability value equal to $1$ for the input variable - \item the set of values that imply a reliability value equal to $0$ for the input variable + \item the set of values that imply a reliability value equal to $0$ for the input variable \end{itemize} \item the output target variable. For this variable it is needed to specify the values of the output target variable that defines the desired outcome. \end{itemize} @@ -1333,11 +1333,11 @@ \subsubsection{Interfaced} \item $R_0$ Probability of the outcome of the output target variable (nominal value) \item $R^{+}_i$ Probability of the outcome of the output target variable if reliability of the input variable is equal to $0$ \item $R^{-}_i$ Probability of the outcome of the output target variable if reliability of the input variable is equal to $1$ -\end{itemize} +\end{itemize} Available measures are: \begin{itemize} - \item Risk Achievement Worth (RAW): $RAW = R^{+}_i / R_0 $ + \item Risk Achievement Worth (RAW): $RAW = R^{+}_i / R_0 $ \item Risk Achievement Worth (RRW): $RRW = R_0 / R^{-}_i$ \item Fussell-Vesely (FV): $FV = (R_0 - R^{-}_i) / R_0$ \item Birnbaum (B): $B = R^{+}_i - R^{-}_i$ @@ -1358,7 +1358,7 @@ \subsubsection{Interfaced} \end{itemize} \textbf{Example:} -This example shows an example where it is desired to calculate all available risk importance measures for two input variables (i.e., pumpTime and valveTime) +This example shows an example where it is desired to calculate all available risk importance measures for two input variables (i.e., pumpTime and valveTime) given an output target variable (i.e., Tmax). A value of the input variable pumpTime in the interval $[0,240]$ implies a reliability value of the input variable pumpTime equal to $0$. A value of the input variable valveTime in the interval $[0,60]$ implies a reliability value of the input variable valveTime equal to $0$. @@ -1375,14 +1375,14 @@ \subsubsection{Interfaced} pumpTime valveTime Tmax - + ... ... \end{lstlisting} -This Post-Processor allows the user to consider also multiple datasets (a data set for each initiating event) and calculate the global risk importance measures. +This Post-Processor allows the user to consider also multiple datasets (a data set for each initiating event) and calculate the global risk importance measures. This can be performed by: \begin{itemize} \item Including all datasets in the step @@ -1418,7 +1418,7 @@ \subsubsection{Interfaced} outcome outRun1 outRun2 - + ... ... @@ -1428,8 +1428,8 @@ \subsubsection{Interfaced} \end{itemize} This post-processor can be made time dependendent if a single HistorySet is provided among the other data objects. -The HistorySet contains the temporal profiles of a subset of the input variables. This temporal profile can be only -boolean, i.e., 0 (component offline) or 1 (component online). +The HistorySet contains the temporal profiles of a subset of the input variables. This temporal profile can be only +boolean, i.e., 0 (component offline) or 1 (component online). Note that the provided history set must contains a single History; multiple Histories are not allowed. When this post-processor is in a dynamic configuration (i.e., time-dependent), the user is required to specify an xml node \xmlNode{temporalID} that indicates the ID of the temporal variable. @@ -1450,12 +1450,12 @@ \subsubsection{Interfaced} outcome outRun1 time - + ... ... - ... + ... outRun1 timeDepProfiles @@ -1505,6 +1505,11 @@ \subsubsection{RavenOutput} file. This will appear as an entry in the output \xmlNode{DataObject} and the corresponding column are the values extracted from this file. If not specified, RAVEN will attempt to find a suitable integer ID to use, and a warning will be raised. + + When defining the \xmlNode{DataObject} that this postprocessor will write to, and when using the static + (non-\xmlNode{dynamic}) form of the postprocessor, the \xmlNode{input} space should be given as + \xmlString{ID}, and the output variables should be the outputs specified in the postprocessor. See the + examples below. In the data object, the variable values will be keyed on the \xmlString{ID} parameter. \end{itemize} Each value that needs to be extracted from the file needs to be specified by one of the following \xmlNode{output} nodes within the \xmlNode{File} node: @@ -1545,7 +1550,11 @@ \subsubsection{RavenOutput} \end{lstlisting} -The RAVEN input to extract this information would appear as follows: + +The RAVEN input to extract this information would appear as follows. +We include an example of defining the \xmlNode{DataObject} that this postprocessor will write out to, for +further clarity. + \begin{lstlisting}[style=XML] ... @@ -1556,7 +1565,7 @@ \subsubsection{RavenOutput} ... ... - + ans|val1 ans|val2 @@ -1569,6 +1578,15 @@ \subsubsection{RavenOutput} ... ... + + ... + + ID + first,second + + ... + + ... \end{lstlisting} diff --git a/framework/DataObjects/Factory.py b/framework/DataObjects/Factory.py index 159fa518cf..3de4a356c3 100644 --- a/framework/DataObjects/Factory.py +++ b/framework/DataObjects/Factory.py @@ -29,6 +29,7 @@ from DataObjects.Data import Data from DataObjects.PointSet import PointSet from DataObjects.HistorySet import HistorySet +from DataObjects.XrDataObject import DataSet ## [ Add new class here ] ################################################################################ ## Alternatively, to fully automate this file: @@ -45,6 +46,8 @@ for classObj in utils.getAllSubclasses(eval(__base)): __interFaceDict[classObj.__name__] = classObj +# TODO hack add-on +__interFaceDict['DataSet'] = DataSet def knownTypes(): """ diff --git a/framework/DataObjects/TestDataSets.py b/framework/DataObjects/TestDataSets.py new file mode 100644 index 0000000000..de0a2aeadf --- /dev/null +++ b/framework/DataObjects/TestDataSets.py @@ -0,0 +1,125 @@ +# Copyright 2017 Battelle Energy Alliance, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + This Module performs Unit Tests for the Distribution class. + It can not be considered part of the active code but of the regression test system +""" + +#For future compatibility with Python 3 +from __future__ import division, print_function, unicode_literals, absolute_import +import warnings +warnings.simplefilter('default',DeprecationWarning) + +import xml.etree.ElementTree as ET +import sys, os +import pickle as pk +import numpy as np +frameworkDir = os.path.dirname(os.path.abspath(os.path.join(sys.argv[0],'..'))) + +sys.path.append(frameworkDir) +from utils.utils import find_crow + +find_crow(frameworkDir) + +import XrDataObject # FIXME +import MessageHandler + +mh = MessageHandler.MessageHandler() +mh.initialize({'verbosity':'debug'}) + +print (XrDataObject ) +def createElement(tag,attrib=None,text=None): + """ + Method to create a dummy xml element readable by the distribution classes + @ In, tag, string, the node tag + @ In, attrib, dict, optional, the attribute of the xml node + @ In, text, str, optional, the dict containig what should be in the xml text + """ + if attrib is None: + attrib = {} + if text is None: + text = '' + element = ET.Element(tag,attrib) + element.text = text + return element + +results = {"pass":0,"fail":0} + +def checkAnswer(comment,value,expected,tol=1e-10): + """ + This method is aimed to compare two floats given a certain tolerance + @ In, comment, string, a comment printed out if it fails + @ In, value, float, the value to compare + @ In, expected, float, the expected value + @ In, tol, float, optional, the tolerance + @ Out, None + """ + if abs(value - expected) > tol: + print("checking answer",comment,value,"!=",expected) + results["fail"] += 1 + else: + results["pass"] += 1 + +#Test module methods #TODO +#print(Distributions.knownTypes()) +#Test error +#try: +# Distributions.returnInstance("unknown",'dud') +#except: +# print("error worked") + +############# +# Point Set # +############# + +xml = createElement('DataSet',attrib={'name':'test'}) +xml.append(createElement('Input',text='a,b,c')) +xml.append(createElement('Output',text='x,y,z')) + +# check construction +# check builtins +# check basic property getters + +# check appending (add row) +# check add var (add column) +# check remove var (remove column) +# check remove sample (remove row) + +# check slicing +# # var vals +# # realization vals +# # by meta +# check find-by-index +# check find-by-value (including find-by-meta) + +# check write to CSV +# check write to netCDF +# check load from CSV +# check load from netCDF + + +print(results) + +sys.exit(results["fail"]) +""" + + framework.test_datasets + talbpaul + 2017-10-20 + DataSet + + This test is a Unit Test for the DataSet classes. + + +""" diff --git a/framework/DataObjects/XrDataObject.py b/framework/DataObjects/XrDataObject.py new file mode 100644 index 0000000000..2227ffcfe7 --- /dev/null +++ b/framework/DataObjects/XrDataObject.py @@ -0,0 +1,243 @@ +import sys,os +import __builtin__ + +import abc +import numpy as np +import pandas as pd +import xarray as xr +from netCDF4 import Dataset as ncDS + +from BaseClasses import BaseType +from utils import utils, cached_ndarray, InputData + +# for profiling with kernprof +try: + __builtin__.profile +except AttributeError: + # profiler not preset, so pass through + def profile(func): return func + +class DataObjectsCollection(InputData.ParameterInput): + """ + Class for reading in a collection of data objects. + """ +DataObjectsCollection.createClass("DataObjects") + +class DataObject(utils.metaclass_insert(abc.ABCMeta,BaseType)): + """ + Base class. Data objects are RAVEN's method for storing data internally and passing it from one + RAVEN entity to another. Fundamentally, they consist of a collection of realizations, each of + which contains inputs, outputs, and pointwise metadata. In addition, the data object has global + metadata. The pointwise inputs and outputs could be floats, time-dependent, or ND-dependent variables. + """ + ### INPUT SPECIFICATION ### + @classmethod + def getInputSpecification(cls): + """ + Method to get a reference to a class that specifies the input data for class "cls". + @ In, cls, the class for which we are retrieving the specification + @ Out, inputSpecification, InputData.ParameterInput, class to use for specifying the input of cls. + """ + inputSpecification = super(DataObject,cls).getInputSpecification() + inputSpecification.addParam('type', param_type = InputData.StringType, required = False) + inputSpecification.addSub(InputData.parameterInputFactory('Input',contentType=InputData.StringType)) + inputSpecification.addSub(InputData.parameterInputFactory('Output',contentType=InputData.StringType)) + return inputSpecification + # TODO on-disk, etc + + def __init__(self): #TODO message handler + """ + Constructor. + @ In, None + @ Out, None + """ + BaseType.__init__(self) + self._inputs = [] # list(str) if input variables + self._outputs = [] # list(str) of output variables + self._metavars = [] # list(str) of POINTWISE metadata variables + self._data = None # underlying data structure + self._collector = None # object used to collect samples + self._heirarchal = False # if True, non-traditional format (not yet implemented) + + def _readMoreXML(self,xmlNode): + """ + Initializes data object based on XML input + @ In, xmlNode, xml.etree.ElementTree.Element, input information + @ Out, None + """ + pass + + def add_realization(self,info_dict): + pass + + def get_data(self): + return self._data + + def read(self,fname): + return xr.open_dataset(fname) +# +# +# +# +class DataSet(DataObject): + """ + DataObject developed Oct 2017 to obtain linear performance from data objects when appending, over + thousands of variables and millions of samples. Wraps np.ndarray for collecting and uses xarray.Dataset + for final form. + """ + def __init__(self):#, in_vars, out_vars, meta_vars=None, dynamic=False, var_dims=None,cacheSize=100,prealloc=False): + """ + Constructor. + """ + DataObject.__init__(self) + #self.vars = self.in_vars + self.out_vars + #if self.dynamic: + # self._data = cached_ndarray.cNDarray(width=len(self.vars),dtype=object) + #else: + # self._data = cached_ndarray.cNDarray(width=len(self.vars)) + + @property + def vars(self): + """ + Property to access all the pointwise variables being controlled by this data object. + @ In, None + @ Out, vars, list(str), variable names list + """ + return self._inputs + self._outputs + self._metavars + + def __len__(self): + """ + Overloads the len() operator. + @ In, None + @ Out, int, number of samples in this dataset + """ + pass #TODO + + def _readMoreXML(self,xmlNode): + """ + Initializes data object based on XML input + @ In, xmlNode, xml.etree.ElementTree.Element, input information + @ Out, None + """ + inp = DataSet.getInputSpecification()() + print('Node:',xmlNode) + inp.parseNode(xmlNode) + for child in inp.subparts: + if child.getName() == 'Input': + self._inputs.extend(list(x for x in child.value.split(','))) + elif child.getName() == 'Output': + self._outputs.extend(list(x for x in child.value.split(','))) + + # API TRANSLATION + # OLD | NEW + # addOutput | ? load from values + # getAllMetadata | ? -remove- + # getHierParam | ? heirarchal only + # getInitParams | ? useful? + # getInpParametersValues | ? getInputValues + # getMatchingRealization | ? same + # getMetadata | ? getPointMeta, getGeneralMeta + # getOutParametersValues | ? getOutputValues + # getParaKeys | ? getInputs, getOutputs, getPointMeta, getGeneralMeta + # getParam | ? getVarValues + # getParametersValues | ? getInputs, getOutputs, getPointMeta, getGeneralMeta + # getRealization | ? by index, by value, also asDataset or NOT (for reading) + # isItEmpty | ? size + # loadXMLandCSV | ? loadFromCSV + # printCSV | ? writeCSV + # _writeUnstructuredInputInXML | ? writeMetaXML + # remoteInputValue | ? removeVariable + # removeOutputValue | ? removeVariable + # resetData | ? reset + # retrieveNodeInTreeMode | ? hierarchal only + # sizeData | ? size + # updateInputValue | addRealization + # updateOutputValue | addRealization + # updateMetadata | addRealization, addGlobalMeta + # addNodeInTreeMode | ? hierarchal only + # _createXMLFile | ? writeMetaXML + # _loadXMLFile | ? readMetaXML + # _readMoreXML | same + # _specializedInputCheck | ? remove + # _specializedLoadXMLandCSV | ? loadFromCSV + # __getVariablesToPrint | ? remove + # __getMetadataType | ? remve + + ### NEW API ### + def addRealization(self,rlz): + """ + Adds a "row" (or "sample") to this data object. + This is the preferred method to add data to this data object. + @ In, rlz, dict, {var:val} format where + "var" is the variable name as a string, + "val" can be either a float (pointset) or xr.DataArray object (ndset) + @ Out, None + """ + # FIXME TODO dynamic + if self.dynamic: + self._data.append(np.asarray([list(rlz[var] for var in self.vars)],dtype=object)) + else: + self._data.append(np.asarray([list(rlz[var] for var in self.vars)])) + + def asDataset(self): + """ + Casts this dataobject as an xr.Dataset. + Functionally, typically collects the data from self._collector and places it in self._data. + Efficiency note: this is the slowest part of typical data collection. + @ In, None + @ Out, xarray.Dataset, all the data from this data object. + """ + # FIXME for collector / data management system + # if nothing to collect, do nothing TODO + if type(self._data) != xr.Dataset: + data = self._data.getData() + method = 'once' # internal flag to switch method. "once" is generally faster, but "split" can be parallelized. + arrs = {} + for v,var in enumerate(self.vars): + if type(data[0,v]) == float: + arrs[var] = xr.DataArray(data[:,v], + dims=['sample'], + coords={'sample':range(len(self._data))}, + name=var) # THIS is very fast + elif type(data[0,v]) == xr.DataArray: + # ONCE # + if method == 'once': + val = dict((i,data[i,v]) for i in range(len(self._data))) + val = xr.Dataset(data_vars=val) + val = val.to_array(dim='sample') + # SPLIT # currently unused, but could be for parallel performance + elif method == 'split': + chunk = 150 + start = 0 + N = len(self._data) + vals = [] + while start < N-1: + stop = min(start+chunk+1,N) + ival = dict((i,data[i,v]) for i in range(start,stop)) + ival = xr.Dataset(data_vars=ival) + ival = ival.to_array(dim='sample') + vals.append(ival) + start = stop + val = xr.concat(vals,dim='sample') + # END # + arrs[var] = val + arrs[var].rename(var) + else: + raise IOError('Unrecognized data type for var "{}": "{}"'.format(var,type(data[0,v]))) + # FIXME currently MAKING not APPENDING! This needs to be fixed. + self._data = xr.Dataset(arrs) + return self._data + + def toNetCDF4(self,fname,**kwargs): + """ + Writes this data object to file in netCDF4. + @ In, fname, str, path/name to write file + @ In, kwargs, dict, optional, keywords to pass to netCDF4 writing + @ Out, None + """ + self.raiseADebug(' ... collecting dataset ...') + self.asDataset() + self.raiseADebug(' ... writing to file ...') + self._data.to_netcdf(fname,**kwargs) + + ### OLD API ### diff --git a/framework/Models/Model.py b/framework/Models/Model.py index 99c6b1a041..6620f343da 100644 --- a/framework/Models/Model.py +++ b/framework/Models/Model.py @@ -74,7 +74,7 @@ class cls. #the possible inputs validateDict['Input'].append(testDict.copy()) validateDict['Input' ][0]['class' ] = 'DataObjects' - validateDict['Input' ][0]['type' ] = ['PointSet','HistorySet'] + validateDict['Input' ][0]['type' ] = ['DataSet'] validateDict['Input' ][0]['required' ] = False validateDict['Input' ][0]['multiplicity'] = 'n' validateDict['Input'].append(testDict.copy()) @@ -86,7 +86,7 @@ class cls. #the possible outputs validateDict['Output'].append(testDict.copy()) validateDict['Output' ][0]['class' ] = 'DataObjects' - validateDict['Output' ][0]['type' ] = ['PointSet','HistorySet'] + validateDict['Output' ][0]['type' ] = ['DataSet'] validateDict['Output' ][0]['required' ] = False validateDict['Output' ][0]['multiplicity'] = 'n' validateDict['Output'].append(testDict.copy()) diff --git a/framework/utils/InputData.py b/framework/utils/InputData.py index 6d2a302b90..379476b276 100644 --- a/framework/utils/InputData.py +++ b/framework/utils/InputData.py @@ -81,6 +81,10 @@ def convert(cls, value): """ return value +# +# +# +# class StringType(InputType): """ A type for arbitrary string data. @@ -89,6 +93,10 @@ class StringType(InputType): StringType.createClass("string","xsd:string") +# +# +# +# class IntegerType(InputType): """ A type for integer data. @@ -105,6 +113,10 @@ def convert(cls, value): IntegerType.createClass("integer","xsd:integer") +# +# +# +# class FloatType(InputType): """ A type for floating point data. @@ -121,6 +133,10 @@ def convert(cls, value): FloatType.createClass("float","xsd:double") +# +# +# +# class EnumBaseType(InputType): """ A type that allows a set list of strings @@ -160,6 +176,10 @@ def generateXML(cls, xsdNode): enumNode = ET.SubElement(restriction, 'xsd:enumeration') enumNode.set('value',enum) +# +# +# +# class BoolType(EnumBaseType): """ A type that allows True or False @@ -168,6 +188,10 @@ class BoolType(EnumBaseType): BoolType.createClass("bool","boolType",["True","False"]) +# +# +# +# class Quantity: """ A class that allows the quantity of a node to be specified. @@ -178,7 +202,10 @@ class Quantity: one = (1,1) one_to_infinity = (1,2) - +# +# +# +# class ParameterInput(object): """ This class is for a node for inputing parameters diff --git a/framework/utils/cached_ndarray.py b/framework/utils/cached_ndarray.py index b1209ffc84..df11317a4e 100644 --- a/framework/utils/cached_ndarray.py +++ b/framework/utils/cached_ndarray.py @@ -22,10 +22,13 @@ warnings.simplefilter('default',DeprecationWarning) #----- end python 2 - 3 compatibility #External Modules------------------------------------------------------------------------------------ -from numpy import ndarray -import numpy as np import sys import threading +from numpy import ndarray +import numpy as np +import blist +import xarray as xr +import pandas as pd lock = threading.Lock() #External Modules End-------------------------------------------------------------------------------- @@ -213,3 +216,146 @@ def __repr__(self): @ Out, __repr__, string, the representation string """ return repr(self.values[:self.size]) + +# +# +# +# +class cNDarray(object): + """ + Higher-dimension caching of numpy arrays. Might include c1darray as a subset if designed right. + + DEV NOTE: + When redesigning the DataObjects in RAVEN in 2017, we tried a wide variety of libraries, strategies, + and data structures. For appending one realization (with N entities) at a time, the np.ndarray proved + most efficient for dropping in values, particularly when cached as per this class. Restructuring the data + into a more useful form (e.g. xarray.Dataset) should be accomplished in the DataObject; this is just a collecting + structure. - talbpw, 2017-10-20 + """ + ### CONSTRUCTOR ### + def __init__(self,values=None,width=None,length=None,dtype=float,buff=None,offset=0,strides=None,order=None): + """ + Constructor. + @ In, values, np.ndarray, optional, matrix of initial values with shape (# samples, # entities) + @ In, width, int, optional, if not using "values" then this is the number of entities to allocate + @ In, length, int, optional, if not using "values" then this is the initial capacity (number of samples) to allocate + @ In, dtype, type, optional, sets the type of the content of the array + @ In, buff, int, optional, buffer size + @ In, offset, int, optional, array offeset + @ In, strides, object, optional, strides (see docs for np.ndarray) + @ In, order, string, optional, array ordering (fortran, c, etc) (see docs for np.ndarray) + @ Out, None + """ + # members of this class + self.values = None # underlying data for this structure, np.ndarray with optional dtype (default float) + self.size = None # number of rows (samples) with actual data (not including empty cached) + self.width = None # number of entities aka columns + self.capacity = None # cached np.ndarray size + # priorities: initialize with values; if not, use width and length + if values is not None: + if type(values) != np.ndarray: + raise IOError('Only np.ndarray can be used to set "values" in "cNDarray". Got '+type(values).__name__) + self.values = values # underlying data structure + self.size = values.shape[0] + self.width = values.shape[1] + # if setting by value, initialize capacity to existing data length + self.capacity = self.size + else: + if width is None: + raise IOError('Creating cNDarray: neither "values" nor "width" was specified!') + self.capacity = length if length is not None else 100 + self.width = width + self.size = 0 + self.values = ndarray((self.capacity,self.width),dtype,buff,offset,strides,order) + + ### PROPERTIES ### + @property + def shape(self): + """ + Shape property, as used in np.ndarray structures. + @ In, None + @ Out, (int,int), the (#rows, #columns) of useful data in this cached array + """ + return (self.size,self.width) + + ### BUILTINS ### + def __array__(self, dtype = None): + """ + so that numpy's array() returns values + @ In, dtype, np.type, the requested type of the array + @ Out, __array__, numpy.ndarray, the requested array + """ + if dtype != None: + return ndarray((self.size,self.width), dtype, buffer=None, offset=0, strides=None, order=None) + else: + return self.getData() + + def __getitem__(self,val): + """ + Get item method. Slicing should work as expected. + @ In, val, slice object, the slicing object (e.g. 1, :, :2, 1:3, etc.) + @ Out, __getitem__, np.ndarray, the element(s) + """ + return self.values[:self.size].__getitem__(val) + + def __iter__(self): + """ + Overload of iterator + @ In, None + @ Out, __iter__, iterator, iterator + """ + return self.values[:self.size].__iter__() + + def __len__(self): + """ + Return size, which is the number of samples, independent of entities, containing useful data. + Does not include cached entries that have not yet been filled. + @ In, None + @ Out, __len__, integer, size + """ + return self.size + + def __repr__(self): + """ + overload of __repr__ function + @ In, None + @ Out, __repr__, string, the representation string + """ + return repr(self.values[:self.size]) + + ### UTILITY FUNCTIONS ### + def append(self,entry): + """ + Append method. call format c1darrayInstance.append(value) + @ In, entry, np.ndarray, the entries to append as [entry, entry, entry]. Must have shape (x, # entities), where x can be any nonzero number of samples. + @ Out, None + """ + # TODO extend to include sending in a (width,) shape np.ndarray to append a single sample, rather than have it forced to be a 1-entry array. + # entry.shape[0] is the number of new entries, entry.shape[1] is the number of variables being entered + # entry must match width and be at least 1 entry long + if type(entry) not in [np.ndarray]: + raise IOError('Tried to add new data to cNDarray. Can only accept np.ndarray, but got '+type(entry).__name__) + # for now require full correct shape, later handle the single entry case + if len(entry.shape)!=2: + # TODO single entry case + raise IOError('Tried to add new data to cNDarray. Need shape (#,{}) but got "{}"!'.format(self.width,entry.shape)) + # must have matching width (fix for single entry case) + if entry.shape[1] != self.width: + raise IOError('Tried to add new data to cNDarray. Need {} entities per entry, but got '.format(self.width)+str(entry.shape[1])) + # check if there's enough space in cache to append the new entries + if self.size + entry.shape[0] > self.capacity: + # since there's not enough space, quadruple available space # TODO change growth parameter to be variable? + self.capacity += max(self.capacity*4,entry.shape[0]) + newdata = np.zeros((self.capacity,self.width),dtype=self.values.dtype) + newdata[:self.size] = self.values[:self.size] + self.values = newdata + self.values[self.size:self.size+entry.shape[0]][:] = entry[:] + self.size += entry.shape[0] + + def getData(self): + """ + Returns the underlying data structure. + @ In, None + @ Out, getData, np.ndarray, underlying data up to the used size + """ + return self.values[:self.size] diff --git a/scripts/TestHarness/testers/RavenUtils.py b/scripts/TestHarness/testers/RavenUtils.py index 45c51905f6..6e0742964e 100644 --- a/scripts/TestHarness/testers/RavenUtils.py +++ b/scripts/TestHarness/testers/RavenUtils.py @@ -31,11 +31,12 @@ def inPython3(): #This list is made of (module, how to check the version, minimum version, # quality assurance module version, maximum version) -modules_to_try = [("numpy",'numpy.version.version',"1.8.0","1.11.0",None), - ("h5py",'h5py.__version__','2.4.0','2.6.0',None), - ("scipy",'scipy.__version__',"0.14.0","0.17.1",None), - ("sklearn",'sklearn.__version__',"0.16.1","0.17.1",None), - ("matplotlib",'matplotlib.__version__',"1.3.1","1.5.3",None)] +modules_to_try = [("numpy" ,'numpy.version.version' ,"1.8.0" , "1.11.0", None ), + ("h5py" ,'h5py.__version__' ,'2.4.0' , '2.6.0' , None ), + ("scipy" ,'scipy.__version__' ,"0.14.0", "0.17.1", None ), + ("sklearn" ,'sklearn.__version__' ,"0.16.1", "0.17.1", None ), + ("matplotlib",'matplotlib.__version__',"1.3.1" , "1.5.3" , None ), + ("xarray" ,'xarray.__version__' ,"0.9.5" , "0.9.5" , None )] def __lookUpPreferredVersion(name): """ @@ -43,29 +44,28 @@ def __lookUpPreferredVersion(name): @In, name, string, the name of the module @Out, result, string, returns the version as a string or "" if unknown """ - for i,fv,ev,qa,mv in modules_to_try: + for i,fv,ev,qa,mv in modules_to_try: if name == i: return qa return "" -__condaList = [("numpy",__lookUpPreferredVersion("numpy")), - ("h5py",__lookUpPreferredVersion("h5py")), - ("scipy",__lookUpPreferredVersion("scipy")), - ("scikit-learn",__lookUpPreferredVersion("sklearn")), - ("matplotlib",__lookUpPreferredVersion("matplotlib")), - ("pyside",""), - ("python","2.7"), - ("hdf5",""), - ("swig",""), - ("pylint",""), - ("coverage",""), - ("lxml","")] +# some names are different between the Module and the Pip names +__moduleNameToPipName = {'sklearn':'scikit-learn'} + +__pipList = [] +__condaList = [] +for entry in modules_to_try: + version = __lookUpPreferredVersion(entry[0]) + __condaList.append((__moduleNameToPipName.get(entry[0],entry[0]),version)) + __pipList.append((__moduleNameToPipName.get(entry[0],entry[0]),version)) -__pipList = [("numpy",__lookUpPreferredVersion("numpy")), - ("h5py",__lookUpPreferredVersion("h5py")), - ("scipy",__lookUpPreferredVersion("scipy")), - ("scikit-learn",__lookUpPreferredVersion("sklearn")), - ("matplotlib",__lookUpPreferredVersion("matplotlib"))] +__condaList += [("python" , "2.7"), + ("pyside" , ""), + ("hdf5" , ""), + ("swig" , ""), + ("pylint" , ""), + ("coverage", ""), + ("lxml" , "")] def moduleReport(module,version=''): """Checks if the module exists. diff --git a/tests/framework/tests b/tests/framework/tests index 3c09b07db5..78f7620853 100644 --- a/tests/framework/tests +++ b/tests/framework/tests @@ -264,6 +264,11 @@ requires_swig2 = True [../] + [./test_datasets] + type = 'RavenPython' + input = '../../framework/DataObjects/TestDataSets.py' + [../] + [./test_xsd_input_data] type = 'RavenPython' input = 'TestXSD/TestDataRead.py' diff --git a/tests/framework/utils/testCachedNDArray.py b/tests/framework/utils/testCachedNDArray.py index 12f4d7d903..ecd63ec719 100644 --- a/tests/framework/utils/testCachedNDArray.py +++ b/tests/framework/utils/testCachedNDArray.py @@ -22,7 +22,9 @@ warnings.simplefilter('default',DeprecationWarning) import os,sys +import copy import numpy as np +import xarray as xr frameworkDir = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])),os.pardir,os.pardir,os.pardir,'framework')) sys.path.append(frameworkDir) @@ -33,6 +35,25 @@ results = {"pass":0,"fail":0} +def checkSame(comment,value,expected,updateResults=True): + """ + This method is aimed to compare two floats given a certain tolerance + @ In, comment, string, a comment printed out if it fails + @ In, value, float, the value to compare + @ In, expected, float, the expected value + @ In, updateResults, bool, optional, if True updates global results + @ Out, None + """ + if value != expected: + print("checking answer",comment,'|',value,"!=",expected) + if updateResults: + results["fail"] += 1 + return False + else: + if updateResults: + results["pass"] += 1 + return True + def checkAnswer(comment,value,expected,tol=1e-10,updateResults=True): """ This method is aimed to compare two floats given a certain tolerance @@ -44,7 +65,7 @@ def checkAnswer(comment,value,expected,tol=1e-10,updateResults=True): @ Out, None """ if abs(value - expected) > tol: - print("checking answer",comment,value,"!=",expected) + print("checking answer",comment,'|',value,"!=",expected) if updateResults: results["fail"] += 1 return False @@ -54,6 +75,10 @@ def checkAnswer(comment,value,expected,tol=1e-10,updateResults=True): return True +################## +# 1D Array Tests # +################## + #establish test array origin = np.array([-3.14,2.99792,2.718,8.987,0.618]) #test init @@ -98,6 +123,140 @@ def checkAnswer(comment,value,expected,tol=1e-10,updateResults=True): print('checking string representation does not match:\n'+msg,'\n!=\n'+right) results['fail']+=1 +################## +# ND Array Tests # +################## + +## POINT SET ## + +# default construction +testArray = cached_ndarray.cNDarray(width=3,length=10) +checkAnswer('initial capacity',testArray.capacity,10) +checkAnswer('initial width',testArray.shape[1],3) +checkAnswer('initial size',testArray.size,0) +checkAnswer('initial len',len(testArray),0) + +#get empty +checkAnswer('getData empty size',testArray.getData().size,0) + +#append entry +vals = np.array([[1.0,2.0,3.0]]) +testArray.append(vals) +#check values +aValues = testArray.getData() +for v,val in enumerate(vals): + checkAnswer('appended[{}]'.format(v),aValues[0,v],vals[0,v]) + +#iter +for aValues in testArray: + for v,val in enumerate(vals): + checkAnswer('iter[{}]'.format(v),aValues[v],vals[0,v]) + +# append more +vals = [0,0] +vals[0] = [11.0,12.0,13.0] +testArray.append(np.array([vals[0]])) +vals[1] = [21.0,22.0,23.0] +testArray.append(np.array([vals[1]])) +#test slicing +for a,ar in enumerate(testArray[1:]): + for i in range(3): + checkAnswer('slicing [{},{}]'.format(a,i),ar[i],vals[a][i]) + +# construction via values +values = np.array( + [[ 1.0, 2.0, 3.0], + [11.0, 12.0, 13.0], + [21.0, 22.0, 23.0]] + ) +testArray = cached_ndarray.cNDarray(values=values) +for i in range(values.shape[0]): + for j in range(values.shape[1]): + checkAnswer('initialize by value: [{},{}]'.format(i,j),values[i][j],testArray.values[i][j]) + + +## ND SET ## + +#default construction, __init__() +testArray = cached_ndarray.cNDarray(width=3,length=10,dtype=object) +checkAnswer('initial capacity',testArray.capacity,10) +checkAnswer('initial width' ,testArray.width,3) +checkAnswer('initial size' ,testArray.size,0) + +#append entry, append() +vals = np.array([[1.0, + xr.DataArray([ 2.0, 2.1, 2.2],dims=['time'],coords={'time':[1e-6,2e-6,3e-6]}), + xr.DataArray([[ 3.00, 3.01, 3.02],[ 3.10, 3.11, 3.12]],dims=['space','time'],coords={'space':[1e-3,2e-3],'time':[1e-6,2e-6,3e-6]}) + ]],dtype=object) +testArray.append(vals) +checkAnswer('ND append, point',testArray.values[0,0],1.0) +checkAnswer('ND append, hist, time 0',testArray.values[0,1][0],2.0) +checkAnswer('ND append, nd, time 0, location 0',testArray.values[0,2][0,0], 3.00) + +# shape, shape() (property) +checkAnswer('ND shape 0' ,testArray.shape[0],1) +checkAnswer('ND shape 1' ,testArray.shape[1],3) + +# as array, __array__() +b = np.asarray(testArray) +checkAnswer('ND as array, point',b[0,0] ,1.0) +checkAnswer('ND as array, hist' ,b[0,1][0] ,2.0) +checkAnswer('ND as array, nd' ,b[0,2][0,0],3.0) + +# __getitem__() +checkAnswer('ND getitem, point',testArray[0,0],1.0) +checkAnswer('ND getitem, hist, time 0',testArray[0,1][0],2.0) +checkAnswer('ND getitem, nd, time 0, location 0',testArray[0,2][0,0], 3.00) + +# __iter__() +for i,entry in enumerate(testArray): + checkAnswer('ND iter, point',entry[0] , 1.0) + checkAnswer('ND iter, hist,',entry[1][0] , 2.0) + checkAnswer('ND iter, nd' ,entry[2][0,0], 3.0) + +# repr, __repr__() +string = repr(testArray).replace('\n','') +correct = 'array([[1.0, array([ 2. , 2.1, 2.2])Coordinates: * time (time) float64 1e-06 2e-06 3e-06, array([[ 3. , 3.01, 3.02], [ 3.1 , 3.11, 3.12]])Coordinates: * time (time) float64 1e-06 2e-06 3e-06 * space (space) float64 0.001 0.002]], dtype=object)' +checkSame('ND repr',string==correct,True) + +# append a few more for testing purposes +for i in range(3): + testArray.append(vals) + +# __len__() +checkAnswer('ND len',len(testArray),4) + + +#values construction +values = np.ndarray([3,3],dtype=object) +values[0,0] = 1.0 +values[1,0] = 11.0 +values[2,0] = 21.0 + +values[0,1] = xr.DataArray([ 2.0, 2.1, 2.2],dims=['time'],coords={'time':[1e-6,2e-6,3e-6]}) +values[1,1] = xr.DataArray([12.0,12.1,12.2],dims=['time'],coords={'time':[1e-6,2e-6,3e-6]}) +values[2,1] = xr.DataArray([22.0,22.1,22.2],dims=['time'],coords={'time':[1e-6,2e-6,3e-6]}) + +values[0,2] = xr.DataArray([[ 3.00, 3.01, 3.02],[ 3.10, 3.11, 3.12]],dims=['space','time'],coords={'space':[1e-3,2e-3],'time':[1e-6,2e-6,3e-6]}) +values[1,2] = xr.DataArray([[13.00,13.01,13.02],[13.10,13.11,13.12]],dims=['space','time'],coords={'space':[1e-3,2e-3],'time':[1e-6,2e-6,3e-6]}) +values[2,2] = xr.DataArray([[23.00,23.01,23.02],[23.10,23.11,23.12]],dims=['space','time'],coords={'space':[1e-3,2e-3],'time':[1e-6,2e-6,3e-6]}) + +testArray = cached_ndarray.cNDarray(values=values) +checkAnswer('ND by value, point, sample 0',testArray.values[0,0],1.0) +checkAnswer('ND by value, point, sample 1',testArray.values[1,0],11.0) +checkAnswer('ND by value, point, sample 2',testArray.values[2,0],21.0) + +checkAnswer('ND by value, hist, sample 0, time 0 (access index)',testArray.values[0,1][0],2.0) +checkAnswer('ND by value, hist, sample 0, time 0 (access label)',testArray.values[0,1].loc[dict(time=1e-6)],2.0) +checkAnswer('ND by value, hist, sample 1, time 1',testArray.values[1,1][1],12.1) +checkAnswer('ND by value, hist, sample 2, time 2',testArray.values[2,1][2],22.2) + +checkAnswer('ND by value, nd, sample 0, time 0, location 0 (access index)',testArray.values[0,2][0,0], 3.00) +checkAnswer('ND by value, nd, sample 0, time 0, location 0 (access label)',testArray.values[0,2].loc[dict(time=1e-6,space=1e-3)], 3.00) +checkAnswer('ND by value, nd, sample 1, time 0, location 1',testArray.values[1,2][0,1],13.01) +checkAnswer('ND by value, nd, sample 2, time 1, location 2',testArray.values[2,2][1,2],23.12) + + print(results) sys.exit(results["fail"])