From 1264e700747f71ee12435a6294da44155b939442 Mon Sep 17 00:00:00 2001 From: richardarsenault <39815445+richardarsenault@users.noreply.github.com> Date: Tue, 24 Oct 2023 19:22:41 -0400 Subject: [PATCH 1/7] Added sensitivity analysis notebook --- docs/notebooks/Sensitivity_analysis.ipynb | 442 ++++++++++++++++++++++ 1 file changed, 442 insertions(+) create mode 100644 docs/notebooks/Sensitivity_analysis.ipynb diff --git a/docs/notebooks/Sensitivity_analysis.ipynb b/docs/notebooks/Sensitivity_analysis.ipynb new file mode 100644 index 00000000..0bb32cbe --- /dev/null +++ b/docs/notebooks/Sensitivity_analysis.ipynb @@ -0,0 +1,442 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Performing a sensitivity analysis\n", + "\n", + "In this notebook, we perform a sensitivity analysis on GR4JCN to determine the importance of each parameter using the Sobol' sensitivity analysis method. The example shown herein is done using very few parameter samples, and as such, results will be poor and should not be interpreted as-is. However, it is possible to use this code locally using RavenPy to run a much larger sampling on a local computer. \n", + "\n", + "We must first start by installing the SALib python package in the PAVICS-Hydro environment as it is currently not installed. In future updates, this next cell will be removed." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: SALib in /opt/conda/envs/birdy/lib/python3.9/site-packages (1.4.7)\n", + "Requirement already satisfied: matplotlib>=3.2.2 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from SALib) (3.7.1)\n", + "Requirement already satisfied: multiprocess in /opt/conda/envs/birdy/lib/python3.9/site-packages (from SALib) (0.70.14)\n", + "Requirement already satisfied: numpy>=1.20.3 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from SALib) (1.23.5)\n", + "Requirement already satisfied: pandas>=1.1.2 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from SALib) (1.3.5)\n", + "Requirement already satisfied: scipy>=1.7.3 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from SALib) (1.9.1)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (1.0.7)\n", + "Requirement already satisfied: cycler>=0.10 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (0.11.0)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (4.39.4)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (1.4.4)\n", + "Requirement already satisfied: packaging>=20.0 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (23.1)\n", + "Requirement already satisfied: pillow>=6.2.0 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (9.4.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (3.0.9)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (2.8.2)\n", + "Requirement already satisfied: importlib-resources>=3.2.0 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (5.12.0)\n", + "Requirement already satisfied: pytz>=2017.3 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from pandas>=1.1.2->SALib) (2023.3)\n", + "Requirement already satisfied: dill>=0.3.6 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from multiprocess->SALib) (0.3.6)\n", + "Requirement already satisfied: zipp>=3.1.0 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from importlib-resources>=3.2.0->matplotlib>=3.2.2->SALib) (3.15.0)\n", + "Requirement already satisfied: six>=1.5 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from python-dateutil>=2.7->matplotlib>=3.2.2->SALib) (1.16.0)\n" + ] + } + ], + "source": [ + "# This package is not yet installed by default in all environments, so let's add it manually\n", + "!pip install --no-input SALib" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare data for GR4JCN\n", + "\n", + "We will use GR4JCN for this analysis. Since the sensitivity analysis acts on a model response to different inputs, we must find a metric that can be used to measure the impacts of parameters on the model response. In this case, we will use the Nash-Sutcliffe and Absolute Error metrics as responses. It could be any scalar value: mean flow, peak flow, lowest flow, flow volume, etc. But for this exercice we suppose that we want to know the impact of a paramter set on an objective function value. We therefore use a dataset that contains observed streamflow to compute the evaluation metrics.\n", + "\n", + "Let's now import the required packages, get the correct data and setup the model hru for physiographic information:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Import required packages:\n", + "import tempfile\n", + "\n", + "import numpy as np\n", + "import datetime as dt\n", + "\n", + "from pathlib import Path\n", + "\n", + "from ravenpy import OutputReader\n", + "from ravenpy.ravenpy import run\n", + "from ravenpy.config import commands as rc\n", + "from ravenpy.config.emulators import GR4JCN\n", + "from ravenpy.utilities.testdata import get_file\n", + "\n", + "from SALib.sample import sobol as sobol_sampler\n", + "from SALib.analyze import sobol as sobol_analyzer\n", + "\n", + "# We get the netCDF from a server. You can replace the getfile method by a string containing the path to your own netCDF\n", + "nc_file = get_file(\n", + " \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n", + ")\n", + "\n", + "# Here, we need to give the name of your different dataset in order to match with Raven models.\n", + "alt_names = {\n", + " \"RAINFALL\": \"rain\",\n", + " \"SNOWFALL\": \"snow\",\n", + " \"TEMP_MIN\": \"tmin\",\n", + " \"TEMP_MAX\": \"tmax\",\n", + " \"PET\": \"pet\",\n", + " \"HYDROGRAPH\": \"qobs\",\n", + "}\n", + "\n", + "# The HRU of your watershed\n", + "hru = dict(area=4250.6, elevation=843.0, latitude=54.4848, longitude=-123.3659)\n", + "\n", + "# The evaluation metrics. Multiple options are possible, as can be found in Tutorial Notebook 06. Here we use Nash-Sutcliffe and Absolute Error.\n", + "eval_metrics = (\"NASH_SUTCLIFFE\",\"ABSERR\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sensitivity analysis step 1: Define the Sobol problem to analyze\n", + "\n", + "Sobol sensitivity analysis requires three distinct steps:\n", + "\n", + "1- Sample parameter sets from the possible parameter space;\n", + "\n", + "2- Run the model and gather the model response for each of these parameter spaces;\n", + "\n", + "3- Analyze the change in model responses as a function of changes in parameter sets.\n", + "\n", + "Therefore, the first step is to sample the parameter space, as done below:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The number of parameter sets to evaluate is 56\n" + ] + } + ], + "source": [ + "# Define the model inputs:\n", + "problem = {\n", + " 'num_vars': 6, # Number of variables\n", + " 'names': ['x1', 'x2', 'x3', 'x4', 'CN1', 'CN2' ], # Names of these variables, to make it easier to follow. Can be any string defined by the user\n", + " 'bounds': [\n", + " [0.01, 2.5], # We must provide lower and upper bounds for each parameter to sample. Must be adjusted for each model.\n", + " [-15.0, 10.0],\n", + " [10.0, 700.0],\n", + " [0.0, 7.0],\n", + " [1.0, 30.0],\n", + " [0.0, 1.0],\n", + " ]\n", + "}\n", + "\n", + "# Generate samples. The number of parameter sets to generate will be N * (2D + 2), where N is defined below and D is the number of\n", + "# model inputs (6 for GR4JCN).\n", + "N = 4\n", + "param_values = sobol_sampler.sample(problem, N)\n", + "\n", + "# Display the size of the param_values matrix. We will run the model with each set of parameters, i.e. one per row.\n", + "print('The number of parameter sets to evaluate is ' + str(param_values.shape[0]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sensitivity analysis step 2: Run the model for each parameter set\n", + "\n", + "In this stage, we have our sampled parameter sets according to the Sobol / Saltelli sampling methods. We now need to run the GR4JCN model for each of these parameter sets and compute the objective function (model response) that we want. Here we ask the model to pre-compute two objective functions (NSE and MAE), so we will be able to perform the sensitivity analysis on both metrics while only running the model once for each parameter set.\n", + "\n", + "We use a simple loop to run the model here, but advanced users could parallelize this as it is an \"embarassingly parallelizable\" problem. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 out of 56 runs performed. Please wait.\n", + "2 out of 56 runs performed. Please wait.\n", + "3 out of 56 runs performed. Please wait.\n", + "4 out of 56 runs performed. Please wait.\n", + "5 out of 56 runs performed. Please wait.\n", + "6 out of 56 runs performed. Please wait.\n", + "7 out of 56 runs performed. Please wait.\n", + "8 out of 56 runs performed. Please wait.\n", + "9 out of 56 runs performed. Please wait.\n", + "10 out of 56 runs performed. Please wait.\n", + "11 out of 56 runs performed. Please wait.\n", + "12 out of 56 runs performed. Please wait.\n", + "13 out of 56 runs performed. Please wait.\n", + "14 out of 56 runs performed. Please wait.\n", + "15 out of 56 runs performed. Please wait.\n", + "16 out of 56 runs performed. Please wait.\n", + "17 out of 56 runs performed. Please wait.\n", + "18 out of 56 runs performed. Please wait.\n", + "19 out of 56 runs performed. Please wait.\n", + "20 out of 56 runs performed. Please wait.\n", + "21 out of 56 runs performed. Please wait.\n", + "22 out of 56 runs performed. Please wait.\n", + "23 out of 56 runs performed. Please wait.\n", + "24 out of 56 runs performed. Please wait.\n", + "25 out of 56 runs performed. Please wait.\n", + "26 out of 56 runs performed. Please wait.\n", + "27 out of 56 runs performed. Please wait.\n", + "28 out of 56 runs performed. Please wait.\n", + "29 out of 56 runs performed. Please wait.\n", + "30 out of 56 runs performed. Please wait.\n", + "31 out of 56 runs performed. Please wait.\n", + "32 out of 56 runs performed. Please wait.\n", + "33 out of 56 runs performed. Please wait.\n", + "34 out of 56 runs performed. Please wait.\n", + "35 out of 56 runs performed. Please wait.\n", + "36 out of 56 runs performed. Please wait.\n", + "37 out of 56 runs performed. Please wait.\n", + "38 out of 56 runs performed. Please wait.\n", + "39 out of 56 runs performed. Please wait.\n", + "40 out of 56 runs performed. Please wait.\n", + "41 out of 56 runs performed. Please wait.\n", + "42 out of 56 runs performed. Please wait.\n", + "43 out of 56 runs performed. Please wait.\n", + "44 out of 56 runs performed. Please wait.\n", + "45 out of 56 runs performed. Please wait.\n", + "46 out of 56 runs performed. Please wait.\n", + "47 out of 56 runs performed. Please wait.\n", + "48 out of 56 runs performed. Please wait.\n", + "49 out of 56 runs performed. Please wait.\n", + "50 out of 56 runs performed. Please wait.\n", + "51 out of 56 runs performed. Please wait.\n", + "52 out of 56 runs performed. Please wait.\n", + "53 out of 56 runs performed. Please wait.\n", + "54 out of 56 runs performed. Please wait.\n", + "55 out of 56 runs performed. Please wait.\n", + "56 out of 56 runs performed. Please wait.\n" + ] + } + ], + "source": [ + "# Set the working directory at one place so all files are overwritten at the same place (Avoids creating hundreds or thousands\n", + "# of folders with each run's data)\n", + "workdir = Path(tempfile.mkdtemp())\n", + "\n", + "# Pre-define the results matrix based on the number of parameters we will test (and thus how many runs we will need to do). We will test SA with \n", + "# two objective functions (NSE and AbsErr). Let's pre-define both vectors now.\n", + "Y_NSE = np.zeros([param_values.shape[0]])\n", + "Y_ABS = np.zeros([param_values.shape[0]])\n", + "\n", + "# Define a run name for files\n", + "run_name = \"SA_Sobol\"\n", + " \n", + "# Now we have a loop that runs the model iteratively, once per parameter set:\n", + "for i, X in enumerate(param_values):\n", + " \n", + " # We need to create the desired model with its parameters the same way as in the Notebook 04_Emulating_hydrological_models.\n", + " m = GR4JCN(\n", + " ObservationData=[rc.ObservationData.from_nc(nc_file, alt_names=\"qobs\")],\n", + " Gauge=[\n", + " rc.Gauge.from_nc(\n", + " nc_file,\n", + " alt_names=alt_names,\n", + " data_kwds={\"ALL\": {\"elevation\": hru[\"elevation\"]}},\n", + " )\n", + " ],\n", + " HRUs=[hru],\n", + " StartDate=dt.datetime(1990, 1, 1),\n", + " EndDate=dt.datetime(1999, 12, 31),\n", + " RunName=run_name,\n", + " EvaluationMetrics=eval_metrics, # We add this code to tell Raven which objective function we want to pass.\n", + " SuppressOutput=True, # This suppresses the writing of files to disk, returning only basic information such as the evaluation metrics values.\n", + " params=X.tolist(), # Here is where we pass the paramter sets to the model, from the loop enumerator X.\n", + " )\n", + " \n", + " # Write the files to disk, and overwrite existing files in the folder (we already got the values we needed from previous runs)\n", + " m.write_rv(workdir=workdir, overwrite=True)\n", + " \n", + " # Run the model and get the path to the outputs folder that can be used in the output reader.\n", + " outputs_path = run(modelname=run_name, configdir=workdir)\n", + "\n", + " # Get the outputs using the Output Reader object.\n", + " outputs = OutputReader(run_name=run_name, path=outputs_path)\n", + "\n", + " # Gather the results for both of the desired objective functions. We will see how the choice of objective function impacts sensitivity.\n", + " Y_NSE[i]=outputs.diagnostics['DIAG_NASH_SUTCLIFFE'][0]\n", + " Y_ABS[i]=outputs.diagnostics['DIAG_ABSERR'][0]\n", + " \n", + " # Print the status of the hydrological modelling runs.\n", + " print(str(i+1) + ' out of ' + str(param_values.shape[0]) + ' runs performed. Please wait.')\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sensitivity analysis step 3: Analyze results and obtain parameter sensitivity indices\n", + "\n", + "At this point, we have a model response for each of the parameter sets. We can analyze the results using the code below. We will display only the total and 1st order sensitivities." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " ST ST_conf\n", + "x1 0.006127 0.241219\n", + "x2 1.087096 1.608638\n", + "x3 1.063538 1.089994\n", + "x4 0.001802 0.203849\n", + "CN1 0.000080 0.000552\n", + "CN2 0.000065 0.016032\n", + " S1 S1_conf\n", + "x1 0.010003 3.412904\n", + "x2 0.182283 10.485992\n", + "x3 0.186033 2.459215\n", + "x4 -0.005261 1.103930\n", + "CN1 0.001258 0.111335\n", + "CN2 -0.001790 0.911684\n", + " S2 S2_conf\n", + "(x1, x2) -0.004795 10.685271\n", + "(x1, x3) -0.006084 8.776468\n", + "(x1, x4) 0.019018 8.274103\n", + "(x1, CN1) 0.019154 8.153521\n", + "(x1, CN2) 0.019095 7.946130\n", + "(x2, x3) -0.172427 18.482753\n", + "(x2, x4) -0.189946 17.563659\n", + "(x2, CN1) -0.186289 17.640564\n", + "(x2, CN2) -0.186383 17.311035\n", + "(x3, x4) -0.182434 4.134092\n", + "(x3, CN1) -0.182830 4.125834\n", + "(x3, CN2) -0.182634 4.045257\n", + "(x4, CN1) 0.013961 1.956480\n", + "(x4, CN2) 0.013853 1.928884\n", + "(CN1, CN2) -0.000724 0.125317\n", + "[ 0.01000257 0.18228334 0.18603266 -0.00526059 0.00125772 -0.00179038]\n" + ] + } + ], + "source": [ + "# Perform analysis for the NSE objective function first\n", + "Si = sobol_analyzer.analyze(problem, Y_NSE, print_to_console=True)\n", + "\n", + "# Print the first-order sensitivity indices\n", + "print(Si['S1'])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " ST ST_conf\n", + "x1 0.007228 0.254758\n", + "x2 1.070896 1.026428\n", + "x3 0.703326 0.702454\n", + "x4 0.000368 0.246604\n", + "CN1 0.000040 0.011486\n", + "CN2 0.000127 0.074282\n", + " S1 S1_conf\n", + "x1 0.013155 7.089881\n", + "x2 0.210238 10.132335\n", + "x3 0.151394 3.690693\n", + "x4 -0.005328 4.115447\n", + "CN1 -0.000279 0.893015\n", + "CN2 -0.001991 2.273377\n", + " S2 S2_conf\n", + "(x1, x2) -0.013023 13.159437\n", + "(x1, x3) 0.005835 12.760746\n", + "(x1, x4) 0.049807 13.022190\n", + "(x1, CN1) 0.049349 13.009315\n", + "(x1, CN2) 0.049013 13.001959\n", + "(x2, x3) -0.169721 33.500692\n", + "(x2, x4) 0.085993 32.564271\n", + "(x2, CN1) 0.082556 33.496530\n", + "(x2, CN2) 0.082515 33.044689\n", + "(x3, x4) -0.184240 16.448204\n", + "(x3, CN1) -0.184525 16.433511\n", + "(x3, CN2) -0.184737 16.392434\n", + "(x4, CN1) 0.009591 5.355330\n", + "(x4, CN2) 0.009536 5.293132\n", + "(CN1, CN2) 0.002064 0.760966\n", + "[ 0.01315518 0.21023796 0.1513938 -0.00532847 -0.00027912 -0.00199112]\n" + ] + } + ], + "source": [ + "# Now perform the sensitivity analysis for the Absolute Error objective function\n", + "Si = sobol_analyzer.analyze(problem, Y_ABS, print_to_console=True)\n", + "\n", + "# Print the first-order sensitivity indices\n", + "print(Si['S1'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Result analysis\n", + "\n", + "We can see that parameters x2 and x3 are more sensitive than the other with total (ST) and 1st order (S1) sensitivities higher than the other parameters. This is true for both objective functions, but could also be different for other metrics, so it is important to keep this in mind when using a sensitivity analysis to determine parameter importance! A common example is a parameter related to snowmelt. This parameter will have no impact if there is no snow during the period used in the model, but would become critical if there were to be snow in following years.\n", + "\n", + "Note that the tables above present the sobol sensitivity in the left column and the confidence interval (95%) in the right column. Values are strange because we are using way too few parameter sets to adequately sample the parameter space here, but increasing the value of \"N\" to 1024 or 2048 would allow for a much better estimation for a 6-parameter model." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From be999ec5c6b3de28de2ef15ab904b90f19792dd2 Mon Sep 17 00:00:00 2001 From: richardarsenault <39815445+richardarsenault@users.noreply.github.com> Date: Tue, 24 Oct 2023 19:24:42 -0400 Subject: [PATCH 2/7] Update index.rst Added sensitivity_analysis notebook to index --- docs/notebooks/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/notebooks/index.rst b/docs/notebooks/index.rst index d4a1c264..7c6978da 100644 --- a/docs/notebooks/index.rst +++ b/docs/notebooks/index.rst @@ -41,6 +41,7 @@ Advanced workflows Managing_Jupyter_Environments Perform_Regionalization Running_HMETS_with_CANOPEX_dataset + Sensitivity_analysis time_series_analysis From ee62990dbcb823e06beecfa9559fb54d440e8091 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Oct 2023 23:25:34 +0000 Subject: [PATCH 3/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/notebooks/Sensitivity_analysis.ipynb | 78 +++++++++++++---------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/docs/notebooks/Sensitivity_analysis.ipynb b/docs/notebooks/Sensitivity_analysis.ipynb index 0bb32cbe..08434878 100644 --- a/docs/notebooks/Sensitivity_analysis.ipynb +++ b/docs/notebooks/Sensitivity_analysis.ipynb @@ -64,23 +64,22 @@ "metadata": {}, "outputs": [], "source": [ + "import datetime as dt\n", + "\n", "# Import required packages:\n", "import tempfile\n", + "from pathlib import Path\n", "\n", "import numpy as np\n", - "import datetime as dt\n", - "\n", - "from pathlib import Path\n", + "from SALib.analyze import sobol as sobol_analyzer\n", + "from SALib.sample import sobol as sobol_sampler\n", "\n", "from ravenpy import OutputReader\n", - "from ravenpy.ravenpy import run\n", "from ravenpy.config import commands as rc\n", "from ravenpy.config.emulators import GR4JCN\n", + "from ravenpy.ravenpy import run\n", "from ravenpy.utilities.testdata import get_file\n", "\n", - "from SALib.sample import sobol as sobol_sampler\n", - "from SALib.analyze import sobol as sobol_analyzer\n", - "\n", "# We get the netCDF from a server. You can replace the getfile method by a string containing the path to your own netCDF\n", "nc_file = get_file(\n", " \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n", @@ -100,7 +99,7 @@ "hru = dict(area=4250.6, elevation=843.0, latitude=54.4848, longitude=-123.3659)\n", "\n", "# The evaluation metrics. Multiple options are possible, as can be found in Tutorial Notebook 06. Here we use Nash-Sutcliffe and Absolute Error.\n", - "eval_metrics = (\"NASH_SUTCLIFFE\",\"ABSERR\")" + "eval_metrics = (\"NASH_SUTCLIFFE\", \"ABSERR\")" ] }, { @@ -136,16 +135,26 @@ "source": [ "# Define the model inputs:\n", "problem = {\n", - " 'num_vars': 6, # Number of variables\n", - " 'names': ['x1', 'x2', 'x3', 'x4', 'CN1', 'CN2' ], # Names of these variables, to make it easier to follow. Can be any string defined by the user\n", - " 'bounds': [\n", - " [0.01, 2.5], # We must provide lower and upper bounds for each parameter to sample. Must be adjusted for each model.\n", - " [-15.0, 10.0],\n", - " [10.0, 700.0],\n", - " [0.0, 7.0],\n", - " [1.0, 30.0],\n", - " [0.0, 1.0],\n", - " ]\n", + " \"num_vars\": 6, # Number of variables\n", + " \"names\": [\n", + " \"x1\",\n", + " \"x2\",\n", + " \"x3\",\n", + " \"x4\",\n", + " \"CN1\",\n", + " \"CN2\",\n", + " ], # Names of these variables, to make it easier to follow. Can be any string defined by the user\n", + " \"bounds\": [\n", + " [\n", + " 0.01,\n", + " 2.5,\n", + " ], # We must provide lower and upper bounds for each parameter to sample. Must be adjusted for each model.\n", + " [-15.0, 10.0],\n", + " [10.0, 700.0],\n", + " [0.0, 7.0],\n", + " [1.0, 30.0],\n", + " [0.0, 1.0],\n", + " ],\n", "}\n", "\n", "# Generate samples. The number of parameter sets to generate will be N * (2D + 2), where N is defined below and D is the number of\n", @@ -154,7 +163,7 @@ "param_values = sobol_sampler.sample(problem, N)\n", "\n", "# Display the size of the param_values matrix. We will run the model with each set of parameters, i.e. one per row.\n", - "print('The number of parameter sets to evaluate is ' + str(param_values.shape[0]))" + "print(\"The number of parameter sets to evaluate is \" + str(param_values.shape[0]))" ] }, { @@ -243,17 +252,16 @@ "# of folders with each run's data)\n", "workdir = Path(tempfile.mkdtemp())\n", "\n", - "# Pre-define the results matrix based on the number of parameters we will test (and thus how many runs we will need to do). We will test SA with \n", + "# Pre-define the results matrix based on the number of parameters we will test (and thus how many runs we will need to do). We will test SA with\n", "# two objective functions (NSE and AbsErr). Let's pre-define both vectors now.\n", "Y_NSE = np.zeros([param_values.shape[0]])\n", "Y_ABS = np.zeros([param_values.shape[0]])\n", "\n", "# Define a run name for files\n", "run_name = \"SA_Sobol\"\n", - " \n", + "\n", "# Now we have a loop that runs the model iteratively, once per parameter set:\n", "for i, X in enumerate(param_values):\n", - " \n", " # We need to create the desired model with its parameters the same way as in the Notebook 04_Emulating_hydrological_models.\n", " m = GR4JCN(\n", " ObservationData=[rc.ObservationData.from_nc(nc_file, alt_names=\"qobs\")],\n", @@ -269,13 +277,13 @@ " EndDate=dt.datetime(1999, 12, 31),\n", " RunName=run_name,\n", " EvaluationMetrics=eval_metrics, # We add this code to tell Raven which objective function we want to pass.\n", - " SuppressOutput=True, # This suppresses the writing of files to disk, returning only basic information such as the evaluation metrics values.\n", - " params=X.tolist(), # Here is where we pass the paramter sets to the model, from the loop enumerator X.\n", + " SuppressOutput=True, # This suppresses the writing of files to disk, returning only basic information such as the evaluation metrics values.\n", + " params=X.tolist(), # Here is where we pass the paramter sets to the model, from the loop enumerator X.\n", " )\n", - " \n", + "\n", " # Write the files to disk, and overwrite existing files in the folder (we already got the values we needed from previous runs)\n", " m.write_rv(workdir=workdir, overwrite=True)\n", - " \n", + "\n", " # Run the model and get the path to the outputs folder that can be used in the output reader.\n", " outputs_path = run(modelname=run_name, configdir=workdir)\n", "\n", @@ -283,12 +291,16 @@ " outputs = OutputReader(run_name=run_name, path=outputs_path)\n", "\n", " # Gather the results for both of the desired objective functions. We will see how the choice of objective function impacts sensitivity.\n", - " Y_NSE[i]=outputs.diagnostics['DIAG_NASH_SUTCLIFFE'][0]\n", - " Y_ABS[i]=outputs.diagnostics['DIAG_ABSERR'][0]\n", - " \n", + " Y_NSE[i] = outputs.diagnostics[\"DIAG_NASH_SUTCLIFFE\"][0]\n", + " Y_ABS[i] = outputs.diagnostics[\"DIAG_ABSERR\"][0]\n", + "\n", " # Print the status of the hydrological modelling runs.\n", - " print(str(i+1) + ' out of ' + str(param_values.shape[0]) + ' runs performed. Please wait.')\n", - " " + " print(\n", + " str(i + 1)\n", + " + \" out of \"\n", + " + str(param_values.shape[0])\n", + " + \" runs performed. Please wait.\"\n", + " )" ] }, { @@ -350,7 +362,7 @@ "Si = sobol_analyzer.analyze(problem, Y_NSE, print_to_console=True)\n", "\n", "# Print the first-order sensitivity indices\n", - "print(Si['S1'])" + "print(Si[\"S1\"])" ] }, { @@ -403,7 +415,7 @@ "Si = sobol_analyzer.analyze(problem, Y_ABS, print_to_console=True)\n", "\n", "# Print the first-order sensitivity indices\n", - "print(Si['S1'])" + "print(Si[\"S1\"])" ] }, { From 538b06b14172264184f4b0798937132631837f2e Mon Sep 17 00:00:00 2001 From: richardarsenault <39815445+richardarsenault@users.noreply.github.com> Date: Tue, 12 Dec 2023 13:54:23 -0500 Subject: [PATCH 4/7] Added Sensitivity analysis NB Added SA notebook, with the hypothesis that SALib is now installed. --- docs/notebooks/Sensitivity_analysis.ipynb | 289 ++++++++++------------ 1 file changed, 128 insertions(+), 161 deletions(-) diff --git a/docs/notebooks/Sensitivity_analysis.ipynb b/docs/notebooks/Sensitivity_analysis.ipynb index 08434878..0f8b5656 100644 --- a/docs/notebooks/Sensitivity_analysis.ipynb +++ b/docs/notebooks/Sensitivity_analysis.ipynb @@ -6,45 +6,7 @@ "source": [ "# Performing a sensitivity analysis\n", "\n", - "In this notebook, we perform a sensitivity analysis on GR4JCN to determine the importance of each parameter using the Sobol' sensitivity analysis method. The example shown herein is done using very few parameter samples, and as such, results will be poor and should not be interpreted as-is. However, it is possible to use this code locally using RavenPy to run a much larger sampling on a local computer. \n", - "\n", - "We must first start by installing the SALib python package in the PAVICS-Hydro environment as it is currently not installed. In future updates, this next cell will be removed." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: SALib in /opt/conda/envs/birdy/lib/python3.9/site-packages (1.4.7)\n", - "Requirement already satisfied: matplotlib>=3.2.2 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from SALib) (3.7.1)\n", - "Requirement already satisfied: multiprocess in /opt/conda/envs/birdy/lib/python3.9/site-packages (from SALib) (0.70.14)\n", - "Requirement already satisfied: numpy>=1.20.3 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from SALib) (1.23.5)\n", - "Requirement already satisfied: pandas>=1.1.2 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from SALib) (1.3.5)\n", - "Requirement already satisfied: scipy>=1.7.3 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from SALib) (1.9.1)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (1.0.7)\n", - "Requirement already satisfied: cycler>=0.10 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (0.11.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (4.39.4)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (1.4.4)\n", - "Requirement already satisfied: packaging>=20.0 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (23.1)\n", - "Requirement already satisfied: pillow>=6.2.0 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (9.4.0)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (3.0.9)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (2.8.2)\n", - "Requirement already satisfied: importlib-resources>=3.2.0 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from matplotlib>=3.2.2->SALib) (5.12.0)\n", - "Requirement already satisfied: pytz>=2017.3 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from pandas>=1.1.2->SALib) (2023.3)\n", - "Requirement already satisfied: dill>=0.3.6 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from multiprocess->SALib) (0.3.6)\n", - "Requirement already satisfied: zipp>=3.1.0 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from importlib-resources>=3.2.0->matplotlib>=3.2.2->SALib) (3.15.0)\n", - "Requirement already satisfied: six>=1.5 in /opt/conda/envs/birdy/lib/python3.9/site-packages (from python-dateutil>=2.7->matplotlib>=3.2.2->SALib) (1.16.0)\n" - ] - } - ], - "source": [ - "# This package is not yet installed by default in all environments, so let's add it manually\n", - "!pip install --no-input SALib" + "In this notebook, we perform a sensitivity analysis on GR4JCN to determine the importance of each parameter using the Sobol' sensitivity analysis method. The example shown herein is done using very few parameter samples, and as such, results will be poor and should not be interpreted as-is. However, it is possible to use this code locally using RavenPy to run a much larger sampling on a local computer. " ] }, { @@ -60,26 +22,27 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "import datetime as dt\n", - "\n", "# Import required packages:\n", "import tempfile\n", - "from pathlib import Path\n", "\n", "import numpy as np\n", - "from SALib.analyze import sobol as sobol_analyzer\n", - "from SALib.sample import sobol as sobol_sampler\n", + "import datetime as dt\n", + "\n", + "from pathlib import Path\n", "\n", "from ravenpy import OutputReader\n", + "from ravenpy.ravenpy import run\n", "from ravenpy.config import commands as rc\n", "from ravenpy.config.emulators import GR4JCN\n", - "from ravenpy.ravenpy import run\n", "from ravenpy.utilities.testdata import get_file\n", "\n", + "from SALib.sample import sobol as sobol_sampler\n", + "from SALib.analyze import sobol as sobol_analyzer\n", + "\n", "# We get the netCDF from a server. You can replace the getfile method by a string containing the path to your own netCDF\n", "nc_file = get_file(\n", " \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n", @@ -99,7 +62,16 @@ "hru = dict(area=4250.6, elevation=843.0, latitude=54.4848, longitude=-123.3659)\n", "\n", "# The evaluation metrics. Multiple options are possible, as can be found in Tutorial Notebook 06. Here we use Nash-Sutcliffe and Absolute Error.\n", - "eval_metrics = (\"NASH_SUTCLIFFE\", \"ABSERR\")" + "eval_metrics = (\"NASH_SUTCLIFFE\",\"ABSERR\")\n", + "\n", + "# Data keywords for meteorological data stations\n", + "data_kwds = {\n", + " \"ALL\": { \n", + " \"elevation\": hru[\"elevation\"], #extract the values directly from the \"hru\" we previously built\n", + " \"latitude\": hru[\"latitude\"],\n", + " \"longitude\": hru[\"longitude\"],\n", + " }\n", + "}\n" ] }, { @@ -121,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -135,26 +107,16 @@ "source": [ "# Define the model inputs:\n", "problem = {\n", - " \"num_vars\": 6, # Number of variables\n", - " \"names\": [\n", - " \"x1\",\n", - " \"x2\",\n", - " \"x3\",\n", - " \"x4\",\n", - " \"CN1\",\n", - " \"CN2\",\n", - " ], # Names of these variables, to make it easier to follow. Can be any string defined by the user\n", - " \"bounds\": [\n", - " [\n", - " 0.01,\n", - " 2.5,\n", - " ], # We must provide lower and upper bounds for each parameter to sample. Must be adjusted for each model.\n", - " [-15.0, 10.0],\n", - " [10.0, 700.0],\n", - " [0.0, 7.0],\n", - " [1.0, 30.0],\n", - " [0.0, 1.0],\n", - " ],\n", + " 'num_vars': 6, # Number of variables\n", + " 'names': ['x1', 'x2', 'x3', 'x4', 'CN1', 'CN2' ], # Names of these variables, to make it easier to follow. Can be any string defined by the user\n", + " 'bounds': [\n", + " [0.01, 2.5], # We must provide lower and upper bounds for each parameter to sample. Must be adjusted for each model.\n", + " [-15.0, 10.0],\n", + " [10.0, 700.0],\n", + " [0.0, 7.0],\n", + " [1.0, 30.0],\n", + " [0.0, 1.0],\n", + " ]\n", "}\n", "\n", "# Generate samples. The number of parameter sets to generate will be N * (2D + 2), where N is defined below and D is the number of\n", @@ -163,7 +125,7 @@ "param_values = sobol_sampler.sample(problem, N)\n", "\n", "# Display the size of the param_values matrix. We will run the model with each set of parameters, i.e. one per row.\n", - "print(\"The number of parameter sets to evaluate is \" + str(param_values.shape[0]))" + "print('The number of parameter sets to evaluate is ' + str(param_values.shape[0]))" ] }, { @@ -179,11 +141,21 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "metadata": { "tags": [] }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/envs/birdy/lib/python3.9/site-packages/xclim/indices/fire/_cffwis.py:207: NumbaDeprecationWarning: \u001b[1mThe 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\u001b[0m\n", + " def _day_length(lat: int | float, mth: int): # pragma: no cover\n", + "/opt/conda/envs/birdy/lib/python3.9/site-packages/xclim/indices/fire/_cffwis.py:227: NumbaDeprecationWarning: \u001b[1mThe 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.\u001b[0m\n", + " def _day_length_factor(lat: float, mth: int): # pragma: no cover\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -252,7 +224,7 @@ "# of folders with each run's data)\n", "workdir = Path(tempfile.mkdtemp())\n", "\n", - "# Pre-define the results matrix based on the number of parameters we will test (and thus how many runs we will need to do). We will test SA with\n", + "# Pre-define the results matrix based on the number of parameters we will test (and thus how many runs we will need to do). We will test SA with \n", "# two objective functions (NSE and AbsErr). Let's pre-define both vectors now.\n", "Y_NSE = np.zeros([param_values.shape[0]])\n", "Y_ABS = np.zeros([param_values.shape[0]])\n", @@ -260,30 +232,29 @@ "# Define a run name for files\n", "run_name = \"SA_Sobol\"\n", "\n", + "config = dict(ObservationData=[rc.ObservationData.from_nc(nc_file, alt_names=\"qobs\")],\n", + " Gauge=[rc.Gauge.from_nc(nc_file, alt_names=alt_names, data_kwds=data_kwds)],\n", + " HRUs=[hru],\n", + " StartDate=dt.datetime(1990, 1, 1),\n", + " EndDate=dt.datetime(1999, 12, 31),\n", + " RunName=run_name,\n", + " EvaluationMetrics=eval_metrics, # We add this code to tell Raven which objective function we want to pass.\n", + " SuppressOutput=True, # This suppresses the writing of files to disk, returning only basic information such as the evaluation metrics values.\n", + " )\n", + "\n", + "\n", "# Now we have a loop that runs the model iteratively, once per parameter set:\n", "for i, X in enumerate(param_values):\n", + " \n", " # We need to create the desired model with its parameters the same way as in the Notebook 04_Emulating_hydrological_models.\n", " m = GR4JCN(\n", - " ObservationData=[rc.ObservationData.from_nc(nc_file, alt_names=\"qobs\")],\n", - " Gauge=[\n", - " rc.Gauge.from_nc(\n", - " nc_file,\n", - " alt_names=alt_names,\n", - " data_kwds={\"ALL\": {\"elevation\": hru[\"elevation\"]}},\n", - " )\n", - " ],\n", - " HRUs=[hru],\n", - " StartDate=dt.datetime(1990, 1, 1),\n", - " EndDate=dt.datetime(1999, 12, 31),\n", - " RunName=run_name,\n", - " EvaluationMetrics=eval_metrics, # We add this code to tell Raven which objective function we want to pass.\n", - " SuppressOutput=True, # This suppresses the writing of files to disk, returning only basic information such as the evaluation metrics values.\n", - " params=X.tolist(), # Here is where we pass the paramter sets to the model, from the loop enumerator X.\n", + " params=X.tolist(), # Here is where we pass the paramter sets to the model, from the loop enumerator X.\n", + " **config,\n", " )\n", - "\n", + " \n", " # Write the files to disk, and overwrite existing files in the folder (we already got the values we needed from previous runs)\n", " m.write_rv(workdir=workdir, overwrite=True)\n", - "\n", + " \n", " # Run the model and get the path to the outputs folder that can be used in the output reader.\n", " outputs_path = run(modelname=run_name, configdir=workdir)\n", "\n", @@ -291,16 +262,12 @@ " outputs = OutputReader(run_name=run_name, path=outputs_path)\n", "\n", " # Gather the results for both of the desired objective functions. We will see how the choice of objective function impacts sensitivity.\n", - " Y_NSE[i] = outputs.diagnostics[\"DIAG_NASH_SUTCLIFFE\"][0]\n", - " Y_ABS[i] = outputs.diagnostics[\"DIAG_ABSERR\"][0]\n", - "\n", + " Y_NSE[i]=outputs.diagnostics['DIAG_NASH_SUTCLIFFE'][0]\n", + " Y_ABS[i]=outputs.diagnostics['DIAG_ABSERR'][0]\n", + " \n", " # Print the status of the hydrological modelling runs.\n", - " print(\n", - " str(i + 1)\n", - " + \" out of \"\n", - " + str(param_values.shape[0])\n", - " + \" runs performed. Please wait.\"\n", - " )" + " print(str(i+1) + ' out of ' + str(param_values.shape[0]) + ' runs performed. Please wait.')\n", + " " ] }, { @@ -314,7 +281,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": { "tags": [] }, @@ -324,36 +291,36 @@ "output_type": "stream", "text": [ " ST ST_conf\n", - "x1 0.006127 0.241219\n", - "x2 1.087096 1.608638\n", - "x3 1.063538 1.089994\n", - "x4 0.001802 0.203849\n", - "CN1 0.000080 0.000552\n", - "CN2 0.000065 0.016032\n", - " S1 S1_conf\n", - "x1 0.010003 3.412904\n", - "x2 0.182283 10.485992\n", - "x3 0.186033 2.459215\n", - "x4 -0.005261 1.103930\n", - "CN1 0.001258 0.111335\n", - "CN2 -0.001790 0.911684\n", - " S2 S2_conf\n", - "(x1, x2) -0.004795 10.685271\n", - "(x1, x3) -0.006084 8.776468\n", - "(x1, x4) 0.019018 8.274103\n", - "(x1, CN1) 0.019154 8.153521\n", - "(x1, CN2) 0.019095 7.946130\n", - "(x2, x3) -0.172427 18.482753\n", - "(x2, x4) -0.189946 17.563659\n", - "(x2, CN1) -0.186289 17.640564\n", - "(x2, CN2) -0.186383 17.311035\n", - "(x3, x4) -0.182434 4.134092\n", - "(x3, CN1) -0.182830 4.125834\n", - "(x3, CN2) -0.182634 4.045257\n", - "(x4, CN1) 0.013961 1.956480\n", - "(x4, CN2) 0.013853 1.928884\n", - "(CN1, CN2) -0.000724 0.125317\n", - "[ 0.01000257 0.18228334 0.18603266 -0.00526059 0.00125772 -0.00179038]\n" + "x1 0.379755 0.412470\n", + "x2 3.951079 4.277464\n", + "x3 0.817404 0.706167\n", + "x4 0.018082 0.042282\n", + "CN1 0.001306 0.014531\n", + "CN2 0.039689 0.050031\n", + " S1 S1_conf\n", + "x1 0.758170 1.225126\n", + "x2 -2.602254 3.146394\n", + "x3 -1.267410 1.067809\n", + "x4 0.154981 0.570345\n", + "CN1 0.047907 0.252695\n", + "CN2 0.269812 0.565875\n", + " S2 S2_conf\n", + "(x1, x2) 32.689367 38.520285\n", + "(x1, x3) 17.576873 20.907644\n", + "(x1, x4) 4.280166 5.520463\n", + "(x1, CN1) 5.621395 7.093809\n", + "(x1, CN2) 3.630504 4.881342\n", + "(x2, x3) 5.204038 3.476739\n", + "(x2, x4) 3.943612 3.646842\n", + "(x2, CN1) 3.985238 3.786346\n", + "(x2, CN2) 3.768349 4.080534\n", + "(x3, x4) 2.068058 1.709529\n", + "(x3, CN1) 2.032927 2.056435\n", + "(x3, CN2) 1.937827 1.429686\n", + "(x4, CN1) -0.166094 0.681523\n", + "(x4, CN2) -0.174896 0.600854\n", + "(CN1, CN2) -0.061728 0.257433\n", + "[ 0.75816978 -2.60225424 -1.26740975 0.15498148 0.04790724 0.26981225]\n" ] } ], @@ -362,12 +329,12 @@ "Si = sobol_analyzer.analyze(problem, Y_NSE, print_to_console=True)\n", "\n", "# Print the first-order sensitivity indices\n", - "print(Si[\"S1\"])" + "print(Si['S1'])" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": { "tags": [] }, @@ -377,36 +344,36 @@ "output_type": "stream", "text": [ " ST ST_conf\n", - "x1 0.007228 0.254758\n", - "x2 1.070896 1.026428\n", - "x3 0.703326 0.702454\n", - "x4 0.000368 0.246604\n", - "CN1 0.000040 0.011486\n", - "CN2 0.000127 0.074282\n", - " S1 S1_conf\n", - "x1 0.013155 7.089881\n", - "x2 0.210238 10.132335\n", - "x3 0.151394 3.690693\n", - "x4 -0.005328 4.115447\n", - "CN1 -0.000279 0.893015\n", - "CN2 -0.001991 2.273377\n", - " S2 S2_conf\n", - "(x1, x2) -0.013023 13.159437\n", - "(x1, x3) 0.005835 12.760746\n", - "(x1, x4) 0.049807 13.022190\n", - "(x1, CN1) 0.049349 13.009315\n", - "(x1, CN2) 0.049013 13.001959\n", - "(x2, x3) -0.169721 33.500692\n", - "(x2, x4) 0.085993 32.564271\n", - "(x2, CN1) 0.082556 33.496530\n", - "(x2, CN2) 0.082515 33.044689\n", - "(x3, x4) -0.184240 16.448204\n", - "(x3, CN1) -0.184525 16.433511\n", - "(x3, CN2) -0.184737 16.392434\n", - "(x4, CN1) 0.009591 5.355330\n", - "(x4, CN2) 0.009536 5.293132\n", - "(CN1, CN2) 0.002064 0.760966\n", - "[ 0.01315518 0.21023796 0.1513938 -0.00532847 -0.00027912 -0.00199112]\n" + "x1 0.074128 2.832662\n", + "x2 0.592907 9.442068\n", + "x3 0.062538 1.799786\n", + "x4 0.001134 0.052102\n", + "CN1 0.000129 0.005306\n", + "CN2 0.045190 2.191904\n", + " S1 S1_conf\n", + "x1 -0.041123 1.257697\n", + "x2 0.880993 2.098604\n", + "x3 -0.060452 1.121361\n", + "x4 -0.003788 0.176675\n", + "CN1 -0.002432 0.053261\n", + "CN2 0.068965 1.111757\n", + " S2 S2_conf\n", + "(x1, x2) 0.152554 9.004543\n", + "(x1, x3) 0.051138 4.004951\n", + "(x1, x4) -0.007737 0.657525\n", + "(x1, CN1) -0.000284 0.382640\n", + "(x1, CN2) -0.072163 4.178101\n", + "(x2, x3) -0.255076 7.009165\n", + "(x2, x4) -0.230651 0.886730\n", + "(x2, CN1) -0.236764 0.262212\n", + "(x2, CN2) -0.223305 7.083388\n", + "(x3, x4) 0.337321 0.782419\n", + "(x3, CN1) 0.336869 0.866348\n", + "(x3, CN2) 0.303673 1.816981\n", + "(x4, CN1) 0.001250 0.127584\n", + "(x4, CN2) -0.006689 0.466249\n", + "(CN1, CN2) -0.003203 0.122958\n", + "[-0.04112282 0.88099262 -0.06045177 -0.00378769 -0.00243184 0.06896523]\n" ] } ], @@ -415,7 +382,7 @@ "Si = sobol_analyzer.analyze(problem, Y_ABS, print_to_console=True)\n", "\n", "# Print the first-order sensitivity indices\n", - "print(Si[\"S1\"])" + "print(Si['S1'])" ] }, { From 240f73ce29d47e8fe3c3ebccc412d54fc4c89018 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 12 Dec 2023 18:54:44 +0000 Subject: [PATCH 5/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/notebooks/Sensitivity_analysis.ipynb | 100 ++++++++++++---------- 1 file changed, 57 insertions(+), 43 deletions(-) diff --git a/docs/notebooks/Sensitivity_analysis.ipynb b/docs/notebooks/Sensitivity_analysis.ipynb index 0f8b5656..8d656fa6 100644 --- a/docs/notebooks/Sensitivity_analysis.ipynb +++ b/docs/notebooks/Sensitivity_analysis.ipynb @@ -27,22 +27,20 @@ "outputs": [], "source": [ "# Import required packages:\n", + "import datetime as dt\n", "import tempfile\n", + "from pathlib import Path\n", "\n", "import numpy as np\n", - "import datetime as dt\n", - "\n", - "from pathlib import Path\n", + "from SALib.analyze import sobol as sobol_analyzer\n", + "from SALib.sample import sobol as sobol_sampler\n", "\n", "from ravenpy import OutputReader\n", - "from ravenpy.ravenpy import run\n", "from ravenpy.config import commands as rc\n", "from ravenpy.config.emulators import GR4JCN\n", + "from ravenpy.ravenpy import run\n", "from ravenpy.utilities.testdata import get_file\n", "\n", - "from SALib.sample import sobol as sobol_sampler\n", - "from SALib.analyze import sobol as sobol_analyzer\n", - "\n", "# We get the netCDF from a server. You can replace the getfile method by a string containing the path to your own netCDF\n", "nc_file = get_file(\n", " \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n", @@ -62,16 +60,18 @@ "hru = dict(area=4250.6, elevation=843.0, latitude=54.4848, longitude=-123.3659)\n", "\n", "# The evaluation metrics. Multiple options are possible, as can be found in Tutorial Notebook 06. Here we use Nash-Sutcliffe and Absolute Error.\n", - "eval_metrics = (\"NASH_SUTCLIFFE\",\"ABSERR\")\n", + "eval_metrics = (\"NASH_SUTCLIFFE\", \"ABSERR\")\n", "\n", "# Data keywords for meteorological data stations\n", "data_kwds = {\n", - " \"ALL\": { \n", - " \"elevation\": hru[\"elevation\"], #extract the values directly from the \"hru\" we previously built\n", + " \"ALL\": {\n", + " \"elevation\": hru[\n", + " \"elevation\"\n", + " ], # extract the values directly from the \"hru\" we previously built\n", " \"latitude\": hru[\"latitude\"],\n", " \"longitude\": hru[\"longitude\"],\n", " }\n", - "}\n" + "}" ] }, { @@ -107,16 +107,26 @@ "source": [ "# Define the model inputs:\n", "problem = {\n", - " 'num_vars': 6, # Number of variables\n", - " 'names': ['x1', 'x2', 'x3', 'x4', 'CN1', 'CN2' ], # Names of these variables, to make it easier to follow. Can be any string defined by the user\n", - " 'bounds': [\n", - " [0.01, 2.5], # We must provide lower and upper bounds for each parameter to sample. Must be adjusted for each model.\n", - " [-15.0, 10.0],\n", - " [10.0, 700.0],\n", - " [0.0, 7.0],\n", - " [1.0, 30.0],\n", - " [0.0, 1.0],\n", - " ]\n", + " \"num_vars\": 6, # Number of variables\n", + " \"names\": [\n", + " \"x1\",\n", + " \"x2\",\n", + " \"x3\",\n", + " \"x4\",\n", + " \"CN1\",\n", + " \"CN2\",\n", + " ], # Names of these variables, to make it easier to follow. Can be any string defined by the user\n", + " \"bounds\": [\n", + " [\n", + " 0.01,\n", + " 2.5,\n", + " ], # We must provide lower and upper bounds for each parameter to sample. Must be adjusted for each model.\n", + " [-15.0, 10.0],\n", + " [10.0, 700.0],\n", + " [0.0, 7.0],\n", + " [1.0, 30.0],\n", + " [0.0, 1.0],\n", + " ],\n", "}\n", "\n", "# Generate samples. The number of parameter sets to generate will be N * (2D + 2), where N is defined below and D is the number of\n", @@ -125,7 +135,7 @@ "param_values = sobol_sampler.sample(problem, N)\n", "\n", "# Display the size of the param_values matrix. We will run the model with each set of parameters, i.e. one per row.\n", - "print('The number of parameter sets to evaluate is ' + str(param_values.shape[0]))" + "print(\"The number of parameter sets to evaluate is \" + str(param_values.shape[0]))" ] }, { @@ -224,7 +234,7 @@ "# of folders with each run's data)\n", "workdir = Path(tempfile.mkdtemp())\n", "\n", - "# Pre-define the results matrix based on the number of parameters we will test (and thus how many runs we will need to do). We will test SA with \n", + "# Pre-define the results matrix based on the number of parameters we will test (and thus how many runs we will need to do). We will test SA with\n", "# two objective functions (NSE and AbsErr). Let's pre-define both vectors now.\n", "Y_NSE = np.zeros([param_values.shape[0]])\n", "Y_ABS = np.zeros([param_values.shape[0]])\n", @@ -232,29 +242,29 @@ "# Define a run name for files\n", "run_name = \"SA_Sobol\"\n", "\n", - "config = dict(ObservationData=[rc.ObservationData.from_nc(nc_file, alt_names=\"qobs\")],\n", - " Gauge=[rc.Gauge.from_nc(nc_file, alt_names=alt_names, data_kwds=data_kwds)],\n", - " HRUs=[hru],\n", - " StartDate=dt.datetime(1990, 1, 1),\n", - " EndDate=dt.datetime(1999, 12, 31),\n", - " RunName=run_name,\n", - " EvaluationMetrics=eval_metrics, # We add this code to tell Raven which objective function we want to pass.\n", - " SuppressOutput=True, # This suppresses the writing of files to disk, returning only basic information such as the evaluation metrics values.\n", - " )\n", + "config = dict(\n", + " ObservationData=[rc.ObservationData.from_nc(nc_file, alt_names=\"qobs\")],\n", + " Gauge=[rc.Gauge.from_nc(nc_file, alt_names=alt_names, data_kwds=data_kwds)],\n", + " HRUs=[hru],\n", + " StartDate=dt.datetime(1990, 1, 1),\n", + " EndDate=dt.datetime(1999, 12, 31),\n", + " RunName=run_name,\n", + " EvaluationMetrics=eval_metrics, # We add this code to tell Raven which objective function we want to pass.\n", + " SuppressOutput=True, # This suppresses the writing of files to disk, returning only basic information such as the evaluation metrics values.\n", + ")\n", "\n", "\n", "# Now we have a loop that runs the model iteratively, once per parameter set:\n", "for i, X in enumerate(param_values):\n", - " \n", " # We need to create the desired model with its parameters the same way as in the Notebook 04_Emulating_hydrological_models.\n", " m = GR4JCN(\n", - " params=X.tolist(), # Here is where we pass the paramter sets to the model, from the loop enumerator X.\n", + " params=X.tolist(), # Here is where we pass the paramter sets to the model, from the loop enumerator X.\n", " **config,\n", " )\n", - " \n", + "\n", " # Write the files to disk, and overwrite existing files in the folder (we already got the values we needed from previous runs)\n", " m.write_rv(workdir=workdir, overwrite=True)\n", - " \n", + "\n", " # Run the model and get the path to the outputs folder that can be used in the output reader.\n", " outputs_path = run(modelname=run_name, configdir=workdir)\n", "\n", @@ -262,12 +272,16 @@ " outputs = OutputReader(run_name=run_name, path=outputs_path)\n", "\n", " # Gather the results for both of the desired objective functions. We will see how the choice of objective function impacts sensitivity.\n", - " Y_NSE[i]=outputs.diagnostics['DIAG_NASH_SUTCLIFFE'][0]\n", - " Y_ABS[i]=outputs.diagnostics['DIAG_ABSERR'][0]\n", - " \n", + " Y_NSE[i] = outputs.diagnostics[\"DIAG_NASH_SUTCLIFFE\"][0]\n", + " Y_ABS[i] = outputs.diagnostics[\"DIAG_ABSERR\"][0]\n", + "\n", " # Print the status of the hydrological modelling runs.\n", - " print(str(i+1) + ' out of ' + str(param_values.shape[0]) + ' runs performed. Please wait.')\n", - " " + " print(\n", + " str(i + 1)\n", + " + \" out of \"\n", + " + str(param_values.shape[0])\n", + " + \" runs performed. Please wait.\"\n", + " )" ] }, { @@ -329,7 +343,7 @@ "Si = sobol_analyzer.analyze(problem, Y_NSE, print_to_console=True)\n", "\n", "# Print the first-order sensitivity indices\n", - "print(Si['S1'])" + "print(Si[\"S1\"])" ] }, { @@ -382,7 +396,7 @@ "Si = sobol_analyzer.analyze(problem, Y_ABS, print_to_console=True)\n", "\n", "# Print the first-order sensitivity indices\n", - "print(Si['S1'])" + "print(Si[\"S1\"])" ] }, { From 69f03e3cf557251d0e0d85bb5728240f4bbe89bc Mon Sep 17 00:00:00 2001 From: David Huard Date: Tue, 12 Dec 2023 15:28:53 -0500 Subject: [PATCH 6/7] Add progress bar. Fix a typo. --- docs/notebooks/Sensitivity_analysis.ipynb | 236 +++++++++------------- 1 file changed, 92 insertions(+), 144 deletions(-) diff --git a/docs/notebooks/Sensitivity_analysis.ipynb b/docs/notebooks/Sensitivity_analysis.ipynb index 8d656fa6..30091420 100644 --- a/docs/notebooks/Sensitivity_analysis.ipynb +++ b/docs/notebooks/Sensitivity_analysis.ipynb @@ -15,15 +15,17 @@ "source": [ "## Prepare data for GR4JCN\n", "\n", - "We will use GR4JCN for this analysis. Since the sensitivity analysis acts on a model response to different inputs, we must find a metric that can be used to measure the impacts of parameters on the model response. In this case, we will use the Nash-Sutcliffe and Absolute Error metrics as responses. It could be any scalar value: mean flow, peak flow, lowest flow, flow volume, etc. But for this exercice we suppose that we want to know the impact of a paramter set on an objective function value. We therefore use a dataset that contains observed streamflow to compute the evaluation metrics.\n", + "We will use GR4JCN for this analysis. Since the sensitivity analysis acts on a model response to different inputs, we must find a metric that can be used to measure the impacts of parameters on the model response. In this case, we will use the Nash-Sutcliffe and Absolute Error metrics as responses. It could be any scalar value: mean flow, peak flow, lowest flow, flow volume, etc. But for this exercice we suppose that we want to know the impact of a parameter set on an objective function value. We therefore use a dataset that contains observed streamflow to compute the evaluation metrics.\n", "\n", - "Let's now import the required packages, get the correct data and setup the model hru for physiographic information:" + "Let's now import the required packages, get the correct data and setup the model HRU for physiographic information." ] }, { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Import required packages:\n", @@ -34,6 +36,7 @@ "import numpy as np\n", "from SALib.analyze import sobol as sobol_analyzer\n", "from SALib.sample import sobol as sobol_sampler\n", + "from tqdm.notebook import tqdm\n", "\n", "from ravenpy import OutputReader\n", "from ravenpy.config import commands as rc\n", @@ -41,7 +44,7 @@ "from ravenpy.ravenpy import run\n", "from ravenpy.utilities.testdata import get_file\n", "\n", - "# We get the netCDF from a server. You can replace the getfile method by a string containing the path to your own netCDF\n", + "# We get the netCDF from a server. You can replace the `get_file` function by a string containing the path to your own netCDF.\n", "nc_file = get_file(\n", " \"raven-gr4j-cemaneige/Salmon-River-Near-Prince-George_meteo_daily.nc\"\n", ")\n", @@ -82,19 +85,19 @@ "\n", "Sobol sensitivity analysis requires three distinct steps:\n", "\n", - "1- Sample parameter sets from the possible parameter space;\n", + "1. Sample parameter sets from the possible parameter space;\n", + "2. Run the model and gather the model response for each of these parameter spaces;\n", + "3. Analyze the change in model responses as a function of changes in parameter sets.\n", "\n", - "2- Run the model and gather the model response for each of these parameter spaces;\n", - "\n", - "3- Analyze the change in model responses as a function of changes in parameter sets.\n", - "\n", - "Therefore, the first step is to sample the parameter space, as done below:" + "Therefore, the first step is to sample the parameter space." ] }, { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "name": "stdout", @@ -167,66 +170,18 @@ ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "1 out of 56 runs performed. Please wait.\n", - "2 out of 56 runs performed. Please wait.\n", - "3 out of 56 runs performed. Please wait.\n", - "4 out of 56 runs performed. Please wait.\n", - "5 out of 56 runs performed. Please wait.\n", - "6 out of 56 runs performed. Please wait.\n", - "7 out of 56 runs performed. Please wait.\n", - "8 out of 56 runs performed. Please wait.\n", - "9 out of 56 runs performed. Please wait.\n", - "10 out of 56 runs performed. Please wait.\n", - "11 out of 56 runs performed. Please wait.\n", - "12 out of 56 runs performed. Please wait.\n", - "13 out of 56 runs performed. Please wait.\n", - "14 out of 56 runs performed. Please wait.\n", - "15 out of 56 runs performed. Please wait.\n", - "16 out of 56 runs performed. Please wait.\n", - "17 out of 56 runs performed. Please wait.\n", - "18 out of 56 runs performed. Please wait.\n", - "19 out of 56 runs performed. Please wait.\n", - "20 out of 56 runs performed. Please wait.\n", - "21 out of 56 runs performed. Please wait.\n", - "22 out of 56 runs performed. Please wait.\n", - "23 out of 56 runs performed. Please wait.\n", - "24 out of 56 runs performed. Please wait.\n", - "25 out of 56 runs performed. Please wait.\n", - "26 out of 56 runs performed. Please wait.\n", - "27 out of 56 runs performed. Please wait.\n", - "28 out of 56 runs performed. Please wait.\n", - "29 out of 56 runs performed. Please wait.\n", - "30 out of 56 runs performed. Please wait.\n", - "31 out of 56 runs performed. Please wait.\n", - "32 out of 56 runs performed. Please wait.\n", - "33 out of 56 runs performed. Please wait.\n", - "34 out of 56 runs performed. Please wait.\n", - "35 out of 56 runs performed. Please wait.\n", - "36 out of 56 runs performed. Please wait.\n", - "37 out of 56 runs performed. Please wait.\n", - "38 out of 56 runs performed. Please wait.\n", - "39 out of 56 runs performed. Please wait.\n", - "40 out of 56 runs performed. Please wait.\n", - "41 out of 56 runs performed. Please wait.\n", - "42 out of 56 runs performed. Please wait.\n", - "43 out of 56 runs performed. Please wait.\n", - "44 out of 56 runs performed. Please wait.\n", - "45 out of 56 runs performed. Please wait.\n", - "46 out of 56 runs performed. Please wait.\n", - "47 out of 56 runs performed. Please wait.\n", - "48 out of 56 runs performed. Please wait.\n", - "49 out of 56 runs performed. Please wait.\n", - "50 out of 56 runs performed. Please wait.\n", - "51 out of 56 runs performed. Please wait.\n", - "52 out of 56 runs performed. Please wait.\n", - "53 out of 56 runs performed. Please wait.\n", - "54 out of 56 runs performed. Please wait.\n", - "55 out of 56 runs performed. Please wait.\n", - "56 out of 56 runs performed. Please wait.\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7b7834b5265540868eba214b2d50ffa9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/56 [00:00 Date: Tue, 12 Dec 2023 15:32:31 -0500 Subject: [PATCH 7/7] add salib to doc env --- environment-rtd.yml | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/environment-rtd.yml b/environment-rtd.yml index 663ef0e1..9e0ac9cd 100644 --- a/environment-rtd.yml +++ b/environment-rtd.yml @@ -20,6 +20,7 @@ dependencies: - notebook - pandoc - pydantic >=1.10.8,<2.0 + - salib - seaborn - sphinx - sphinx-autoapi diff --git a/pyproject.toml b/pyproject.toml index d87951cc..ed7e0439 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,6 +104,7 @@ docs = [ "numpydoc", "pandoc", "pymetalink", + "salib", "s3fs", "sphinx", "sphinx-click",