From 75963155c97d15a1ba103134e09be8c712b98b83 Mon Sep 17 00:00:00 2001 From: John Halley Gotway Date: Tue, 1 Nov 2022 17:28:12 -0600 Subject: [PATCH 1/3] Per #2277, define utility function to error out about python support not being compiled and update point2grid, plot_point_obs, ascii2nc, point_stat, ensemble_stat, and the vx_data2d_factory to call it. --- src/basic/vx_config/config_util.cc | 17 +++++++++++++++++ src/basic/vx_config/config_util.h | 2 ++ src/libcode/vx_data2d_factory/data2d_factory.cc | 5 +---- .../vx_data2d_factory/var_info_factory.cc | 5 +---- src/tools/core/ensemble_stat/ensemble_stat.cc | 9 +++++++-- src/tools/core/point_stat/point_stat.cc | 9 +++++++-- src/tools/other/ascii2nc/ascii2nc.cc | 3 +++ .../other/plot_point_obs/plot_point_obs.cc | 9 +++++++-- src/tools/other/point2grid/point2grid.cc | 8 ++++++-- 9 files changed, 51 insertions(+), 16 deletions(-) diff --git a/src/basic/vx_config/config_util.cc b/src/basic/vx_config/config_util.cc index 80dee7748f..b9b48a1931 100644 --- a/src/basic/vx_config/config_util.cc +++ b/src/basic/vx_config/config_util.cc @@ -3121,3 +3121,20 @@ NormalizeType parse_conf_normalize(Dictionary *dict) { } /////////////////////////////////////////////////////////////////////////////// +// +// Print consistent error message and exit +// +/////////////////////////////////////////////////////////////////////////////// + +void python_compile_error(const char *caller) { + ConcatString cs; + if(caller) cs << caller << " -> "; + + mlog << Error << "\n" << cs + << "Support for Python has not been compiled!\n" + << "To run Python scripts, recompile with the --enable-python option.\n\n"; + + exit(1); +} + +/////////////////////////////////////////////////////////////////////////////// diff --git a/src/basic/vx_config/config_util.h b/src/basic/vx_config/config_util.h index 9debcd2d49..e69e27fb1c 100644 --- a/src/basic/vx_config/config_util.h +++ b/src/basic/vx_config/config_util.h @@ -139,6 +139,8 @@ extern ConcatString wavelettype_to_string(WaveletType); extern int parse_conf_percentile(Dictionary *dict); +extern void python_compile_error(const char *caller=0); + //////////////////////////////////////////////////////////////////////// #endif /* __CONFIG_UTIL_H__ */ diff --git a/src/libcode/vx_data2d_factory/data2d_factory.cc b/src/libcode/vx_data2d_factory/data2d_factory.cc index a2cc01b8af..28d0d355ea 100644 --- a/src/libcode/vx_data2d_factory/data2d_factory.cc +++ b/src/libcode/vx_data2d_factory/data2d_factory.cc @@ -104,10 +104,7 @@ MetPythonDataFile * p = 0; case FileType_Python_Numpy: case FileType_Python_Xarray: - mlog << Error << "\nMet2dDataFileFactory::new_met_2d_data_file() -> " - << "Support for Python has not been compiled!\n" - << "To run Python scripts, recompile with the --enable-python option.\n\n"; - exit(1); + python_compile_error("Met2dDataFileFactory::new_met_2d_data_file()"); #endif diff --git a/src/libcode/vx_data2d_factory/var_info_factory.cc b/src/libcode/vx_data2d_factory/var_info_factory.cc index c09d03265d..324f2a88f4 100644 --- a/src/libcode/vx_data2d_factory/var_info_factory.cc +++ b/src/libcode/vx_data2d_factory/var_info_factory.cc @@ -96,10 +96,7 @@ VarInfo * VarInfoFactory::new_var_info(GrdFileType type) p = 0; break; #else - mlog << Error << "\nVarInfoFactory::new_var_info() -> " - << "Support for Python has not been compiled!\n" - << "To run Python scripts, recompile with the --enable-python option.\n\n"; - exit(1); + python_compile_error("VarInfoFactory::new_var_info()"); #endif case FileType_NcCF: diff --git a/src/tools/core/ensemble_stat/ensemble_stat.cc b/src/tools/core/ensemble_stat/ensemble_stat.cc index 6123c005fe..9826b9225b 100644 --- a/src/tools/core/ensemble_stat/ensemble_stat.cc +++ b/src/tools/core/ensemble_stat/ensemble_stat.cc @@ -907,11 +907,14 @@ void process_point_obs(int i_nc) { bool use_python = false; MetNcPointObsIn nc_point_obs; MetPointData *met_point_obs = 0; -#ifdef WITH_PYTHON - MetPythonPointDataFile met_point_file; + + // Check for python format string python_command = point_obs_file_list[i_nc]; bool use_xarray = (0 == python_command.find(conf_val_python_xarray)); use_python = use_xarray || (0 == python_command.find(conf_val_python_numpy)); + +#ifdef WITH_PYTHON + MetPythonPointDataFile met_point_file; if (use_python) { int offset = python_command.find("="); if (offset == std::string::npos) { @@ -932,6 +935,8 @@ void process_point_obs(int i_nc) { use_var_id = met_point_file.is_using_var_id(); } else { +#else + if (use_python) python_compile_error(method_name); #endif if(!nc_point_obs.open(point_obs_file_list[i_nc].c_str())) { nc_point_obs.close(); diff --git a/src/tools/core/point_stat/point_stat.cc b/src/tools/core/point_stat/point_stat.cc index 94fd159536..257c06e86f 100644 --- a/src/tools/core/point_stat/point_stat.cc +++ b/src/tools/core/point_stat/point_stat.cc @@ -693,11 +693,14 @@ void process_obs_file(int i_nc) { bool use_python = false; MetNcPointObsIn nc_point_obs; MetPointData *met_point_obs = 0; -#ifdef WITH_PYTHON - MetPythonPointDataFile met_point_file; + + // Check for python format string python_command = obs_file[i_nc]; bool use_xarray = (0 == python_command.find(conf_val_python_xarray)); use_python = use_xarray || (0 == python_command.find(conf_val_python_numpy)); + +#ifdef WITH_PYTHON + MetPythonPointDataFile met_point_file; if (use_python) { int offset = python_command.find("="); if (offset == std::string::npos) { @@ -718,6 +721,8 @@ void process_obs_file(int i_nc) { use_var_id = met_point_file.is_using_var_id(); } else { +#else + if (use_python) python_compile_error(method_name); #endif if( !nc_point_obs.open(obs_file[i_nc].c_str()) ) { nc_point_obs.close(); diff --git a/src/tools/other/ascii2nc/ascii2nc.cc b/src/tools/other/ascii2nc/ascii2nc.cc index e55753ca23..f00e826c90 100644 --- a/src/tools/other/ascii2nc/ascii2nc.cc +++ b/src/tools/other/ascii2nc/ascii2nc.cc @@ -601,6 +601,9 @@ void set_format(const StringArray & a) { ascii_format = ASCIIFormat_Python; } #endif + else if("python" == a[0]) { + python_compile_error("set_format()"); + } else { mlog << Error << "\nset_format() -> " << "unsupported ASCII observation format \"" diff --git a/src/tools/other/plot_point_obs/plot_point_obs.cc b/src/tools/other/plot_point_obs/plot_point_obs.cc index 164d4304f7..6c60242ccb 100644 --- a/src/tools/other/plot_point_obs/plot_point_obs.cc +++ b/src/tools/other/plot_point_obs/plot_point_obs.cc @@ -154,11 +154,14 @@ void process_point_obs(const char *point_obs_filename) { bool use_python = false; MetNcPointObsIn nc_point_obs; MetPointData *met_point_obs = 0; -#ifdef WITH_PYTHON - MetPythonPointDataFile met_point_file; + + // Check for python format string python_command = point_obs_filename; bool use_xarray = (0 == python_command.find(conf_val_python_xarray)); use_python = use_xarray || (0 == python_command.find(conf_val_python_numpy)); + +#ifdef WITH_PYTHON + MetPythonPointDataFile met_point_file; if (use_python) { int offset = python_command.find("="); if (offset == std::string::npos) { @@ -178,6 +181,8 @@ void process_point_obs(const char *point_obs_filename) { met_point_obs = met_point_file.get_met_point_data(); } else +#else + if (use_python) python_compile_error(method_name); #endif { if(!nc_point_obs.open(point_obs_filename)) { diff --git a/src/tools/other/point2grid/point2grid.cc b/src/tools/other/point2grid/point2grid.cc index 496e3384e0..c389a0c78c 100644 --- a/src/tools/other/point2grid/point2grid.cc +++ b/src/tools/other/point2grid/point2grid.cc @@ -282,11 +282,13 @@ void process_command_line(int argc, char **argv) { RGInfo.name = cline[1]; OutputFilename = cline[2]; - // Check if the input file -#ifdef WITH_PYTHON + // Check for python format string python_command = InputFilename; bool use_xarray = (0 == python_command.find(conf_val_python_xarray)); bool use_python = use_xarray || (0 == python_command.find(conf_val_python_numpy)); + + // Check if the input file +#ifdef WITH_PYTHON if (use_python) { int offset = python_command.find("="); if (offset == std::string::npos) { @@ -296,6 +298,8 @@ void process_command_line(int argc, char **argv) { } } else +#else + if (use_python) python_compile_error(method_name); #endif if ( !file_exists(InputFilename.c_str()) ) { mlog << Error << "\n" << method_name From ca1e7485137435c64f6f37bac8a6123f93d23659 Mon Sep 17 00:00:00 2001 From: John Halley Gotway Date: Wed, 2 Nov 2022 08:56:39 -0600 Subject: [PATCH 2/3] Unrelated to #2277, refined the documentation for the CRPS_EMP_FAIR statistic. --- docs/Users_Guide/appendixC.rst | 14 ++++++++++---- docs/Users_Guide/ensemble-stat.rst | 4 ++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/Users_Guide/appendixC.rst b/docs/Users_Guide/appendixC.rst index f40e779ebe..aa25c53f1e 100644 --- a/docs/Users_Guide/appendixC.rst +++ b/docs/Users_Guide/appendixC.rst @@ -977,7 +977,7 @@ CRPS Called "CRPS", "CRPSCL", "CRPS_EMP", "CRPS_EMP_FAIR" and "CRPSCL_EMP" in ECNT output :numref:`table_ES_header_info_es_out_ECNT` -The continuous ranked probability score (CRPS) is the integral, over all possible thresholds, of the Brier scores (:ref:`Gneiting et al., 2004 `). In MET, the CRPS is calculated two ways: using a normal distribution fit to the ensemble forecasts (CRPS and CRPSCL), and using the empirical ensemble distribution (CRPS_EMP and CRPSCL_EMP). The empirical ensemble CRPS is also adjusted by subtracting 1/2(m) times the mean absolute difference of the ensemble members (where m is the ensemble size), this is saved as CRPS_EMP_FAIR. In some cases, use of other distributions would be better. +The continuous ranked probability score (CRPS) is the integral, over all possible thresholds, of the Brier scores (:ref:`Gneiting et al., 2004 `). In MET, the CRPS is calculated two ways: using a normal distribution fit to the ensemble forecasts (CRPS and CRPSCL), and using the empirical ensemble distribution (CRPS_EMP and CRPSCL_EMP). The empirical ensemble CRPS can be adjusted (bias corrected) by subtracting 1/(2*m) times the mean absolute difference of the ensemble members, where m is the ensemble size. This is reported as a separate statistic called CRPS_EMP_FAIR. In some cases, use of other distributions would be better. WARNING: The normal distribution is probably a good fit for temperature and pressure, and possibly a not horrible fit for winds. However, the normal approximation will not work on most precipitation forecasts and may fail for many other atmospheric variables. @@ -987,13 +987,19 @@ Closed form expressions for the CRPS are difficult to define when using data rat In this equation, the y represents the event threshold. The estimated mean and standard deviation of the ensemble forecasts ( :math:`\mu \text{ and } \sigma`) are used as the parameters of the normal distribution. The values of the normal distribution are represented by the probability density function (PDF) denoted by :math:`\Phi` and the cumulative distribution function (CDF), denoted in the above equation by :math:`\phi`. -The overall CRPS is calculated as the average of the individual measures. In equation form: :math:`\text{CRPS} = \text{average(crps) } = \frac{1}{N} \sum_i^N \text{crps}_i`. +The overall CRPS is calculated as the average of the individual measures. In equation form: + +.. math:: \text{CRPS} = \text{average(crps) } = \frac{1}{N} \sum_{i=1}^N \text{crps}_i The score can be interpreted as a continuous version of the mean absolute error (MAE). Thus, the score is negatively oriented, so smaller is better. Further, similar to MAE, bias will inflate the CRPS. Thus, bias should also be calculated and considered when judging forecast quality using CRPS. -To calculate CRPS_EMP_FAIR (bias adjusted, empirical ensemble CRPS) +To calculate crps_emp_fair (bias adjusted, empirical ensemble CRPS) for each individual observation with m ensemble members: + +.. math:: \text{crps_emp_fair}_i = \text{crps_emp}_i - \frac{1}{2*m} * \frac{1}{m*(m-1)} \sum_{i \ne j}|f_{i} - f_{j}| + +The overall CRPS_EMP_FAIR is calculated as the average of the individual measures. In equation form: -.. math:: \text{CRPS_EMP_FAIR} = \text{CRPS_EMP} - \frac{1}{2*n} * \frac{1}{n*(n-1)} \sum|f_{i} - f_{j}| +.. math:: \text{CRPS_EMP_FAIR} = \text{average(crps_emp_fair) } = \frac{1}{N} \sum_{i=1}^N \text{crps_emp_fair}_i CRPS Skill Score ---------------- diff --git a/docs/Users_Guide/ensemble-stat.rst b/docs/Users_Guide/ensemble-stat.rst index de7b167edc..1ea3658415 100644 --- a/docs/Users_Guide/ensemble-stat.rst +++ b/docs/Users_Guide/ensemble-stat.rst @@ -48,7 +48,7 @@ Often, the goal of ensemble forecasting is to reproduce the distribution of obse The relative position (RELP) is a count of the number of times each ensemble member is closest to the observation. For stochastic or randomly derived ensembles, this statistic is meaningless. For specified ensemble members, however, it can assist users in determining if any ensemble member is performing consistently better or worse than the others. -The ranked probability score (RPS) is included in the Ranked Probability Score (RPS) line type. It is the mean of the Brier scores computed from ensemble probabilities derived for each probability category threshold (prob_cat_thresh) specified in the configuration file. The continuous ranked probability score (CRPS) is the average the distance between the forecast (ensemble) cumulative distribution function and the observation cumulative distribution function. It is an analog of the Brier score, but for continuous forecast and observation fields. The CRPS statistic is computed using two methods: assuming a normal distribution defined by the ensemble mean and spread (:ref:`Gneiting et al., 2004 `) and using the empirical ensemble distribution (:ref:`Hersbach, 2000 `). The CRPS statistic using the empirical ensemble distribution can be adjusted (bias corrected) by subtracting 1/2(m) times the mean absolute difference of the ensemble members (where m is the ensemble size), this is saved as a separate stastics called CRPS_EMP_FAIR. The CRPS (and CRPS FAIR) statistic is included in the Ensemble Continuous Statistics (ECNT) line type, along with other statistics quantifying the ensemble spread and ensemble mean skill. +The ranked probability score (RPS) is included in the Ranked Probability Score (RPS) line type. It is the mean of the Brier scores computed from ensemble probabilities derived for each probability category threshold (prob_cat_thresh) specified in the configuration file. The continuous ranked probability score (CRPS) is the average the distance between the forecast (ensemble) cumulative distribution function and the observation cumulative distribution function. It is an analog of the Brier score, but for continuous forecast and observation fields. The CRPS statistic is computed using two methods: assuming a normal distribution defined by the ensemble mean and spread (:ref:`Gneiting et al., 2004 `) and using the empirical ensemble distribution (:ref:`Hersbach, 2000 `). The CRPS statistic using the empirical ensemble distribution can be adjusted (bias corrected) by subtracting 1/(2*m) times the mean absolute difference of the ensemble members, where m is the ensemble size. This is reported as a separate statistic called CRPS_EMP_FAIR. The empirical CRPS and its fair version are included in the Ensemble Continuous Statistics (ECNT) line type, along with other statistics quantifying the ensemble spread and ensemble mean skill. The Ensemble-Stat tool can derive ensemble relative frequencies and verify them as probability forecasts all in the same run. Note however that these simple ensemble relative frequencies are not actually calibrated probability forecasts. If probabilistic line types are requested (output_flag), this logic is applied to each pair of fields listed in the forecast (fcst) and observation (obs) dictionaries of the configuration file. Each probability category threshold (prob_cat_thresh) listed for the forecast field is applied to the input ensemble members to derive a relative frequency forecast. The probability category threshold (prob_cat_thresh) parsed from the corresponding observation entry is applied to the (gridded or point) observations to determine whether or not the event actually occurred. The paired ensemble relative freqencies and observation events are used to populate an Nx2 probabilistic contingency table. The dimension of that table is determined by the probability PCT threshold (prob_pct_thresh) configuration file option parsed from the forecast dictionary. All probabilistic output types requested are derived from the this Nx2 table and written to the ascii output files. Note that the FCST_VAR name header column is automatically reset as "PROB({FCST_VAR}{THRESH})" where {FCST_VAR} is the current field being evaluated and {THRESH} is the threshold that was applied. @@ -622,7 +622,7 @@ The format of the STAT and ASCII output of the Ensemble-Stat tool are described - The Continuous Ranked Probability Skill Score (empirical distribution) * - 41 - CRPS_EMP_FAIR - - The Continuous Ranked Probability Skill Score (empirical distribution) adjusted by subtracting 1/2(m) times the mean absolute difference of the ensemble members (m is the ensemble size) + - The Continuous Ranked Probability Score (empirical distribution) adjusted by the mean absolute difference of the ensemble members * - 42 - MAE - The Mean Absolute Error of the ensemble mean (unperturbed or supplied) From 05746d24aa4ea589d0955029fd0534a8c55f028c Mon Sep 17 00:00:00 2001 From: John Halley Gotway Date: Wed, 2 Nov 2022 10:02:36 -0600 Subject: [PATCH 3/3] Per #2277, more consistent formatting of -> in the error messages. --- src/basic/vx_config/config_util.cc | 6 +++--- src/libcode/vx_data2d_factory/data2d_factory.cc | 2 +- src/libcode/vx_data2d_factory/var_info_factory.cc | 2 +- src/tools/other/ascii2nc/ascii2nc.cc | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/basic/vx_config/config_util.cc b/src/basic/vx_config/config_util.cc index b9b48a1931..7306429e29 100644 --- a/src/basic/vx_config/config_util.cc +++ b/src/basic/vx_config/config_util.cc @@ -3127,10 +3127,10 @@ NormalizeType parse_conf_normalize(Dictionary *dict) { /////////////////////////////////////////////////////////////////////////////// void python_compile_error(const char *caller) { - ConcatString cs; - if(caller) cs << caller << " -> "; - mlog << Error << "\n" << cs + const char *method_name = (0 != caller) ? caller : "python_compile_error() -> "; + + mlog << Error << "\n" << method_name << "Support for Python has not been compiled!\n" << "To run Python scripts, recompile with the --enable-python option.\n\n"; diff --git a/src/libcode/vx_data2d_factory/data2d_factory.cc b/src/libcode/vx_data2d_factory/data2d_factory.cc index 28d0d355ea..719a249b4f 100644 --- a/src/libcode/vx_data2d_factory/data2d_factory.cc +++ b/src/libcode/vx_data2d_factory/data2d_factory.cc @@ -104,7 +104,7 @@ MetPythonDataFile * p = 0; case FileType_Python_Numpy: case FileType_Python_Xarray: - python_compile_error("Met2dDataFileFactory::new_met_2d_data_file()"); + python_compile_error("Met2dDataFileFactory::new_met_2d_data_file() -> "); #endif diff --git a/src/libcode/vx_data2d_factory/var_info_factory.cc b/src/libcode/vx_data2d_factory/var_info_factory.cc index 324f2a88f4..bdcee5a618 100644 --- a/src/libcode/vx_data2d_factory/var_info_factory.cc +++ b/src/libcode/vx_data2d_factory/var_info_factory.cc @@ -96,7 +96,7 @@ VarInfo * VarInfoFactory::new_var_info(GrdFileType type) p = 0; break; #else - python_compile_error("VarInfoFactory::new_var_info()"); + python_compile_error("VarInfoFactory::new_var_info() -> "); #endif case FileType_NcCF: diff --git a/src/tools/other/ascii2nc/ascii2nc.cc b/src/tools/other/ascii2nc/ascii2nc.cc index f00e826c90..3ebd547f1a 100644 --- a/src/tools/other/ascii2nc/ascii2nc.cc +++ b/src/tools/other/ascii2nc/ascii2nc.cc @@ -602,7 +602,7 @@ void set_format(const StringArray & a) { } #endif else if("python" == a[0]) { - python_compile_error("set_format()"); + python_compile_error("set_format() -> "); } else { mlog << Error << "\nset_format() -> "