dtcenter · georgemccabe · Jul 21, 2021 · Jun 12, 2021 · Jun 12, 2021 · Jul 12, 2021
diff --git a/docs/Users_Guide/glossary.rst b/docs/Users_Guide/glossary.rst
@@ -7172,6 +7172,41 @@ METplus Configuration Glossary
 
      | *Used by:*  TCPairs
 
+   USER_SCRIPT_INPUT_DIR
+     Optional directory to look for input files. Prepended to each input
+     template (see :term:`USER_SCRIPT_INPUT_TEMPLATE`).
+
+     | *Used by:*  UserScript
+
+   USER_SCRIPT_INPUT_TEMPLATE
+     Optional list of input templates to use to look for input files.
+     If :term:`USER_SCRIPT_INPUT_DIR` is set, prepend that path to each item.
+     When the UserScript wrapper is run, the templates defined here will be
+     used to populate a list of all of the files that match the template for
+     each run time specified. Depending on the runtime frequency defined in
+     :term:`USER_SCRIPT_RUNTIME_FREQ`, text files will be generated that
+     contain a list of the file paths that correspond to the current run.
+     If any files are not found on disk, then "missing" will be added in place
+     of the file path. Each file list text file will be named after the current
+     init/valid/lead values for that run and a label named input<n> where <n>
+     is a zero-based index of the template, i.e. a single template will be
+     labelled input0, two templates will be labelled input0 and input1, etc.
+     Custom labels can be defined with
+     :term:`USER_SCRIPT_INPUT_TEMPLATE_LABELS`. For each template, an
+     environment variable named METPLUS_FILELIST_<label> will be set to the
+     path of the appropriate file list text file. This environment variable
+     can be referenced by the user-defined script to obtain the file list.
+
+     | *Used by:*  UserScript
+
+   USER_SCRIPT_INPUT_TEMPLATE_LABELS
+     Optional list of labels that correspond to each input template defined.
+     See :term:`USER_SCRIPT_INPUT_TEMPLATE`. Each template that does not have
+     a label defined will be assigned a label with the format input<n> where
+     <n> is the zero-based index of the template in the list.
+
+     | *Used by:*  UserScript
+
    TC_PAIRS_RUN_ONCE
      If True and LOOP_ORDER = processes, TCPairs will be run once using the
      INIT_BEG or VALID_BEG value (depending on the value of LOOP_BY).

diff --git a/docs/Users_Guide/wrappers.rst b/docs/Users_Guide/wrappers.rst
@@ -6466,6 +6466,13 @@ executable and any desired arguments. The variable support filename template
 substitution to send information like the current initialization or forecast
 lead time. See :ref:`Runtime_Freq` for more information on how the value of
 :term:`USER_SCRIPT_RUNTIME_FREQ` can control how the commands are called.
+Optionally, file paths can be defined with filename templates to generate
+a file list text file that contains all existing file paths that correspond
+to the appropriate runtime frequency for the current run time. The path to
+the file list text files are set as environment variables that can be
+referenced inside the user-defined script to obtain a list of the files that
+should be processed.
+See :term:`USER_SCRIPT_INPUT_TEMPLATE` for more information.
 
 METplus Configuration
 ---------------------
@@ -6474,4 +6481,7 @@ METplus Configuration
 | :term:`USER_SCRIPT_COMMAND`
 | :term:`USER_SCRIPT_CUSTOM_LOOP_LIST`
 | :term:`USER_SCRIPT_SKIP_TIMES`
+| :term:`USER_SCRIPT_INPUT_DIR` (optional)
+| :term:`USER_SCRIPT_INPUT_TEMPLATE` (optional)
+| :term:`USER_SCRIPT_INPUT_TEMPLATE_LABELS` (optional)
 |
diff --git a/docs/use_cases/model_applications/s2s/UserScript_obsERA_obsOnly_WeatherRegime.py b/docs/use_cases/model_applications/s2s/UserScript_obsERA_obsOnly_WeatherRegime.py
@@ -13,15 +13,17 @@
 # --------------------
 #
 # To perform a weather regime analysis using 500 mb height data.  There are 2 pre-
-# processing steps, RegridDataPlane and PcpCombine, and 3 steps in the weather regime 
-# analysis, elbow, EOFs, and K means.  The elbow and K means steps begin with K means
-# clustering.  Elbow then computes the sum of squared distances for clusters 1 - 14 
-# and draws a straight line from the sum of squared distance for the clusters.  This 
-# helps determine the optimal cluster number by examining the largest difference between 
-# the curve and the straight line.  The EOFs step is optional.  It computes an empirical 
-# orthogonal function analysis.  The K means step uses clustering to compute the 
-# frequency of occurrernce and anomalies for each cluster to give the most common
-# weather regimes.
+# processing steps, RegridDataPlane and PcpCombine, and 4 steps in the weather regime 
+# analysis, elbow, EOFs, K means, and Time frequency.  The elbow and K means steps begin 
+# with K means clustering.  Elbow then computes the sum of squared distances for clusters 
+# 1 - 14 and draws a straight line from the sum of squared distance for the clusters.  
+# This helps determine the optimal cluster number by examining the largest difference 
+# between the curve and the straight line.  The EOFs step is optional.  It computes an 
+# empirical orthogonal function analysis.  The K means step uses clustering to compute 
+# the frequency of occurrernce and anomalies for each cluster to give the most common
+# weather regimes.  Finally, the time frequency computes the frequency of each weather
+# regime over a user specified time frame.
+#
 
 ##############################################################################
 # Datasets
@@ -57,25 +59,29 @@
 # ------------------
 #
 # This use case runs the weather regime driver script which runs the steps the user
-# lists in STEPS_OBS.  The possible steps are regridding, time averaging, computing the 
-# elbow (ELBOW), plotting the elbow (PLOTELBOW), computing EOFs (EOF), plotting EOFs 
-# (PLOTEOF), computing K means (KMEANS), and plotting the K means (PLOTKMEANS).  Regridding 
-# and time averaging are set up in the UserScript .conf file and are formatted as follows:
-# PROCESS_LIST = RegridDataPlane(regrid_obs), PcpCombine(daily_mean_obs), UserScript(script_wr)
-#
-# The other steps are listed in the weather regime analsysis .conf file
+# lists in STEPS_OBS.  The possible steps are regridding, time averaging, creating a list of input
+# files for the weather regime calculation, computing the elbow (ELBOW), plotting the elbow 
+# (PLOTELBOW), computing EOFs (EOF), plotting EOFs (PLOTEOF), computing K means (KMEANS), plotting 
+# the K means (PLOTKMEANS), computing a time frequency of weather regimes (TIMEFREQ) and plotting 
+# the time frequency (PLOTFREQ).  Regridding and time averaging are set up in the UserScript .conf file 
+# and are formatted as follows:
+# PROCESS_LIST = RegridDataPlane(regrid_obs), PcpCombine(daily_mean_obs), UserScript(obs_wr_filelist), UserScript(script_wr)
+#
+# The other steps are listed in the [user_env_vars] section of the UserScript .conf file
 # in the following format:
-# OBS_STEPS = ELBOW+PLOTELBOW+EOF+PLOTEOF+KMEANS+PLOTKMEANS
+# OBS_STEPS = ELBOW+PLOTELBOW+EOF+PLOTEOF+KMEANS+PLOTKMEANS+TIMEFREQ+PLOTFREQ
+#
 
 ##############################################################################
 # METplus Workflow
 # ----------------
 #
 # The weather regime python code is run for each time for the forecast and observations 
 # data. This example loops by valid time.  This version is set to only process the weather 
-# regime steps (ELBOW, PLOTELBOW, EOF, PLOTEOF, KMEANS, PLOTKMEANS), omitting the REGRID 
-# and TIMEAVE pre-processing steps.  However, the configurations for pre-processing are
-# available for user reference.
+# regime steps (ELBOW, PLOTELBOW, EOF, PLOTEOF, KMEANS, PLOTKMEANS, TIMEFREQ, PLOTFREQ), omitting 
+# the regridding, time averaging, and creating the file list pre-processing steps.  However, the 
+# configurations for pre-processing are available for user reference.
+#
 
 ##############################################################################
 # METplus Configuration
@@ -90,6 +96,7 @@
 #
 # .. highlight:: bash
 # .. literalinclude:: ../../../../parm/use_cases/model_applications/s2s/UserScript_obsERA_obsOnly_WeatherRegime.conf
+#
 
 ##############################################################################
 # MET Configuration
@@ -102,6 +109,7 @@
 #
 # parm/use_cases/met_tool_wrapper/RegridDataPlane/RegridDataPlane.py
 # parm/use_cases/met_tool_wrapper/PCPCombine/PCPCOmbine_derive.py
+#
 
 ##############################################################################
 # Python Scripts
@@ -162,10 +170,15 @@
 #
 # Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. Output for this use 
 # case will be found in model_applications/s2s/WeatherRegime (relative to **OUTPUT_BASE**) and will contain output 
-# for the steps requested.  This may include the regridded data, daily averaged files, and a weather regime output 
-# file.  In addition, output elbow, EOF, and Kmeans weather regime plots can be generated.  The location
-# of these output plots can be specified as WR_OUTPUT_DIR.  If it is not specified, plots will be sent 
-# to model_applications/s2s/WeatherRegime/plots (relative to **OUTPUT_BASE**).
+# for the steps requested.  This may include the regridded data, daily averaged files, a text file containing the 
+# list of input files, aweather regime output, and MET matched pair files for the weather regime classification and 
+# time frequency (if KMEANS and TIMEFREQ are run for both the forecast and observation data). In addition, output 
+# elbow, EOF, and Kmeans weather regime plots can be generated.  The location of these output plots can be specified 
+# as WR_OUTPUT_DIR.  If it is not specified, plots will be sent to {OUTPUT_BASE}/plots.  The output location for 
+# the matched pair files can be specified as WR_MPR_OUTPUT_DIR.  If it is not specified, it will be sent to 
+# {OUTPUT_BASE}/mpr.  The output weather regime text or netCDF file location is set in WR_OUTPUT_FILE_DIR.  If this 
+# is not specified, the output text/netCDF file will be sent to {OUTPUT_BASE}.
+#
 
 ##############################################################################
 # Keywords

diff --git a/internal_tests/pytests/grid_diag/test_grid_diag.py b/internal_tests/pytests/grid_diag/test_grid_diag.py
@@ -107,9 +107,9 @@ def test_get_all_files_and_subset(metplus_config, time_info, expected_subset):
     actual_files = [item for sub in actual_files for item in sub]
     assert(actual_files == expected_files)
 
-    file_list_files = wrapper.subset_input_files(time_info)
-    assert(file_list_files is not None)
-    with open(file_list_files[0], 'r') as file_handle:
+    file_list_dict = wrapper.subset_input_files(time_info)
+    assert file_list_dict
+    with open(file_list_dict['input0'], 'r') as file_handle:
         file_list = file_handle.readlines()
 
     file_list = file_list[1:]

diff --git a/internal_tests/pytests/minimum_pytest.seneca.sh b/internal_tests/pytests/minimum_pytest.seneca.sh
@@ -0,0 +1,4 @@
+export METPLUS_TEST_INPUT_BASE=/d1/projects/METplus/METplus_Data
+export METPLUS_TEST_OUTPUT_BASE=/d1/personal/${USER}/pytest
+export METPLUS_TEST_MET_INSTALL_DIR=/usr/local/met
+export METPLUS_TEST_TMP_DIR=${METPLUS_TEST_OUTPUT_BASE}/tmp
diff --git a/metplus/wrappers/command_builder.py b/metplus/wrappers/command_builder.py
@@ -863,13 +863,14 @@ def write_list_file(self, filename, file_list, output_dir=None):
         if not os.path.exists(list_dir):
             os.makedirs(list_dir, mode=0o0775)
 
-        self.logger.debug(f"Writing list of filenames to {list_path}")
+        self.logger.debug("Writing list of filenames...")
         with open(list_path, 'w') as file_handle:
             file_handle.write('file_list\n')
             for f_path in file_list:
                 self.logger.debug(f"Adding file to list: {f_path}")
                 file_handle.write(f_path + '\n')
 
+        self.logger.debug(f"Wrote list of filenames to {list_path}")
         return list_path
 
     def find_and_check_output_file(self, time_info=None,

diff --git a/metplus/wrappers/grid_diag_wrapper.py b/metplus/wrappers/grid_diag_wrapper.py
@@ -54,9 +54,7 @@ def create_c_dict(self):
             self.log_error('GRID_DIAG_CONFIG_FILE required to run.')
 
         c_dict['INPUT_DIR'] = self.config.getdir('GRID_DIAG_INPUT_DIR', '')
-        c_dict['INPUT_TEMPLATES'] = util.getlist(
-            self.config.getraw('filename_templates',
-                               'GRID_DIAG_INPUT_TEMPLATE'))
+        self.get_input_templates(c_dict)
 
         c_dict['OUTPUT_DIR'] = self.config.getdir('GRID_DIAG_OUTPUT_DIR', '')
         c_dict['OUTPUT_TEMPLATE'] = (
@@ -163,11 +161,11 @@ def run_at_time_custom(self, time_info):
         self.clear()
 
         # subset input files as appropriate
-        input_list_files = self.subset_input_files(time_info)
-        if not input_list_files:
+        input_list_dict = self.subset_input_files(time_info)
+        if not input_list_dict:
             return
 
-        for input_list_file in input_list_files:
+        for input_list_file in input_list_dict.values():
             self.infiles.append(input_list_file)
 
         # get output path
@@ -222,28 +220,6 @@ def set_data_field(self, time_info):
 
         return True
 
-    def find_input_files(self, time_info):
-        """! Loop over list of input templates and find files for each
-
-             @param time_info time dictionary to use for string substitution
-             @returns Dictionary of key input number and value is list of
-              input file list if all files were found, None if not.
-        """
-        all_input_files = {}
-        for idx, input_template in enumerate(self.c_dict['INPUT_TEMPLATES']):
-            self.c_dict['INPUT_TEMPLATE'] = input_template
-            input_files = self.find_data(time_info, return_list=True)
-            if not input_files:
-                continue
-
-            all_input_files[f'input{idx}'] = input_files
-
-        # return None if no matching input files were found
-        if not all_input_files:
-            return None
-
-        return all_input_files
-
     def set_command_line_arguments(self, time_info):
         """! add config file passing through do_string_sub to get custom
          string if set
@@ -274,66 +250,3 @@ def get_files_from_time(self, time_info):
             file_dict[key] = value
 
         return file_dict
-
-    def subset_input_files(self, time_info):
-        """! Obtain a subset of input files from the c_dict ALL_FILES based on
-             the time information for the current run.
-
-              @param time_info dictionary containing time information
-              @returns the path to a ascii file containing the list of files
-               or None if could not find any files
-        """
-        all_input_files = {}
-        for file_dict in self.c_dict['ALL_FILES']:
-            # compare time information for each input file
-            # add file to list of files to use if it matches
-            if not self.compare_time_info(time_info, file_dict['time_info']):
-                continue
-
-            input_keys = [key for key in file_dict if key.startswith('input')]
-            for input_key in input_keys:
-                if input_key not in all_input_files:
-                    all_input_files[input_key] = []
-                all_input_files[input_key].extend(file_dict[input_key])
-
-        # return None if no matching input files were found
-        if not all_input_files:
-            return None
-
-        # loop over all inputs and write a file list file for each
-        list_file_paths = []
-        for identifier, input_files in all_input_files.items():
-            list_file_name = self.get_list_file_name(time_info, identifier)
-            list_file_path = self.write_list_file(list_file_name, input_files)
-            list_file_paths.append(list_file_path)
-
-        return list_file_paths
-
-    @staticmethod
-    def get_list_file_name(time_info, identifier):
-        """! Build name of ascii file that contains a list of files to process.
-             If wildcard is set for init, valid, or lead then use the text ALL
-             in the filename.
-
-             @param time_info dictionary containing time information
-             @param identifier string to identify which input is used
-             @returns filename i.e.
-              grid_diag_files_{identifier}_init_{init}_valid_{valid}_lead_{lead}.txt
-        """
-        if time_info['init'] == '*':
-            init = 'ALL'
-        else:
-            init = time_info['init'].strftime('%Y%m%d%H%M%S')
-
-        if time_info['valid'] == '*':
-            valid = 'ALL'
-        else:
-            valid = time_info['valid'].strftime('%Y%m%d%H%M%S')
-
-        if time_info['lead'] == '*':
-            lead = 'ALL'
-        else:
-            lead = time_util.ti_get_seconds_from_lead(time_info['lead'],
-                                                      time_info['valid'])
-
-        return f"grid_diag_files_{identifier}_init_{init}_valid_{valid}_lead_{lead}.txt"