From 89da0d6d5b77eac39a03c2b784d9fb17febad70f Mon Sep 17 00:00:00 2001 From: justin-richling Date: Tue, 25 Apr 2023 16:13:20 -0600 Subject: [PATCH 1/3] Add check for incorrect climo years If user declares incorrect start and/or end climo years in config yaml file, check if in the datasets and force first and/or last years as default if not --- lib/adf_info.py | 45 +++++++++++++++++++++++++++++++++++---------- lib/adf_web.py | 1 - 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/lib/adf_info.py b/lib/adf_info.py index 3b39e0cd5..f755d3e41 100644 --- a/lib/adf_info.py +++ b/lib/adf_info.py @@ -144,15 +144,16 @@ def __init__(self, config_file, debug=False): syear_baseline = self.get_baseline_info('start_year') eyear_baseline = self.get_baseline_info('end_year') + #Get climo years for verification or assignment if missing + baseline_hist_locs = self.get_baseline_info('cam_hist_loc', + required=True) + starting_location = Path(baseline_hist_locs) + files_list = sorted(starting_location.glob('*'+hist_str+'.*.nc')) + base_climo_yrs = sorted(np.unique([i.stem[-7:-3] for i in files_list])) + #Check if start or end year is missing. If so then just assume it is the #start or end of the entire available model data. if syear_baseline is None or eyear_baseline is None: - baseline_hist_locs = self.get_baseline_info('cam_hist_loc', - required=True) - starting_location = Path(baseline_hist_locs) - files_list = sorted(starting_location.glob('*'+hist_str+'.*.nc')) - base_climo_yrs = sorted(np.unique([i.stem[-7:-3] for i in files_list])) - if syear_baseline is None: print(f"No given start year for {data_name}, using first found year...") syear_baseline = int(base_climo_yrs[0]) @@ -161,6 +162,16 @@ def __init__(self, config_file, debug=False): print(f"No given end year for {data_name}, using last found year...") eyear_baseline = int(base_climo_yrs[-1]) #End if + else: + if str(syear_baseline) not in base_climo_yrs: + print(f"Given start year '{syear_baseline}' is not in current dataset {data_name}, using first found year:",base_climo_yrs[0],"\n") + syear_baseline = int(base_climo_yrs[0]) + #End if + + if str(eyear_baseline) not in base_climo_yrs: + print(f"Given end year '{eyear_baseline}' is not in current dataset {data_name}, using last found year:",base_climo_yrs[-1],"\n") + eyear_baseline = int(base_climo_yrs[-1]) + #End if #End if data_name += f"_{syear_baseline}_{eyear_baseline}" @@ -168,6 +179,7 @@ def __init__(self, config_file, debug=False): self.__syear_baseline = syear_baseline self.__eyear_baseline = eyear_baseline + #Create plot location variable for potential use by the website generator. #Please note that this is also assumed to be the output location for the analyses scripts: #------------------------------------------------------------------------- @@ -199,11 +211,14 @@ def __init__(self, config_file, debug=False): for case_idx, case_name in enumerate(case_names): + #Get climo years for verification or assignment if missing + starting_location = Path(cam_hist_locs[case_idx]) + files_list = sorted(starting_location.glob('*'+hist_str+'.*.nc')) + case_climo_yrs = sorted(np.unique([i.stem[-7:-3] for i in files_list])) + + #Check if start or end year is missing. If so then just assume it is the + #start or end of the entire available model data. if syears[case_idx] is None or eyears[case_idx] is None: - print(f"No given climo years for {case_name}...") - starting_location = Path(cam_hist_locs[case_idx]) - files_list = sorted(starting_location.glob('*'+hist_str+'.*.nc')) - case_climo_yrs = sorted(np.unique([i.stem[-7:-3] for i in files_list])) if syears[case_idx] is None: print(f"No given start year for {case_name}, using first found year...") syears[case_idx] = int(case_climo_yrs[0]) @@ -212,6 +227,16 @@ def __init__(self, config_file, debug=False): print(f"No given end year for {case_name}, using last found year...") eyears[case_idx] = int(case_climo_yrs[-1]) #End if + else: + if str(syears[case_idx]) not in case_climo_yrs: + print(f"Given start year '{syears[case_idx]}' is not in current dataset {case_name}, using first found year:",case_climo_yrs[0],"\n") + syears[case_idx] = int(case_climo_yrs[0]) + #End if + + if str(eyears[case_idx]) not in case_climo_yrs: + print(f"Given end year '{eyears[case_idx]}' is not in current dataset {case_name}, using last found year:",case_climo_yrs[-1],"\n") + eyears[case_idx] = int(case_climo_yrs[-1]) + #End if #End if case_name += f"_{syears[case_idx]}_{eyears[case_idx]}" diff --git a/lib/adf_web.py b/lib/adf_web.py index d15c2a8b3..01216dd8f 100644 --- a/lib/adf_web.py +++ b/lib/adf_web.py @@ -361,7 +361,6 @@ def create_website(self): for case_idx, case_name in enumerate(case_names): if (syear_cases[case_idx] and eyear_cases[case_idx]) == None: - print(f"No given climo years for {case_name}...") starting_location = Path(cam_ts_locs[case_idx]) files_list = sorted(starting_location.glob('*nc')) #This assumes CAM file names stay with this convention From af544737144de152e8c24d7b73d15ee06b043453 Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Thu, 4 May 2023 15:41:39 -0600 Subject: [PATCH 2/3] Remove requiremnet of CAM history file paths. --- lib/adf_config.py | 4 ++ lib/adf_info.py | 134 ++++++++++++++++++++++++++++------------------ run_adf_diag | 2 +- 3 files changed, 87 insertions(+), 53 deletions(-) diff --git a/lib/adf_config.py b/lib/adf_config.py index 57adc6686..3943f7336 100644 --- a/lib/adf_config.py +++ b/lib/adf_config.py @@ -272,6 +272,7 @@ def read_config_var(self, varname, conf_dict=None, required=False): emsg = "Supplied 'conf_dict' variable should be a dictionary," emsg += f" not type '{type(conf_dict)}'" raise TypeError(emsg) + #End if #Check that variable name exists in dictionary: if varname not in var_dict.keys(): @@ -280,9 +281,11 @@ def read_config_var(self, varname, conf_dict=None, required=False): emsg = f"Required variable '{varname}' not found in config file." emsg +=" Please see 'config_cam_baseline_example.yaml'." raise KeyError(emsg) + #End if #If not, then just return None: return None + #End if #Extract variable from dictionary: var = var_dict[varname] @@ -293,6 +296,7 @@ def read_config_var(self, varname, conf_dict=None, required=False): emsg = f"Required variable '{varname}' has not been set to a value." emsg += " Please see 'config_cam_baseline_example.yaml'." raise ValueError(emsg) + #End if #return a copy of the variable/list/dictionary, #this is done so that scripts can modify the copy diff --git a/lib/adf_info.py b/lib/adf_info.py index f755d3e41..fe9d83875 100644 --- a/lib/adf_info.py +++ b/lib/adf_info.py @@ -39,6 +39,7 @@ #ADF modules: from adf_config import AdfConfig +from adf_base import AdfError #+++++++++++++++++++ #Define Obs class @@ -145,38 +146,54 @@ def __init__(self, config_file, debug=False): eyear_baseline = self.get_baseline_info('end_year') #Get climo years for verification or assignment if missing - baseline_hist_locs = self.get_baseline_info('cam_hist_loc', - required=True) - starting_location = Path(baseline_hist_locs) - files_list = sorted(starting_location.glob('*'+hist_str+'.*.nc')) - base_climo_yrs = sorted(np.unique([i.stem[-7:-3] for i in files_list])) - - #Check if start or end year is missing. If so then just assume it is the - #start or end of the entire available model data. - if syear_baseline is None or eyear_baseline is None: - if syear_baseline is None: - print(f"No given start year for {data_name}, using first found year...") - syear_baseline = int(base_climo_yrs[0]) - #End if - if eyear_baseline is None: - print(f"No given end year for {data_name}, using last found year...") - eyear_baseline = int(base_climo_yrs[-1]) + baseline_hist_locs = self.get_baseline_info('cam_hist_loc') + + #Check if history file path exists: + if baseline_hist_locs: + + starting_location = Path(baseline_hist_locs) + files_list = sorted(starting_location.glob('*'+hist_str+'.*.nc')) + base_climo_yrs = sorted(np.unique([i.stem[-7:-3] for i in files_list])) + + #Check if start or end year is missing. If so then just assume it is the + #start or end of the entire available model data. + if syear_baseline is None or eyear_baseline is None: + if syear_baseline is None: + print(f"No given start year for {data_name}, using first found year...") + syear_baseline = int(base_climo_yrs[0]) + #End if + if eyear_baseline is None: + print(f"No given end year for {data_name}, using last found year...") + eyear_baseline = int(base_climo_yrs[-1]) + #End if + else: + if str(syear_baseline) not in base_climo_yrs: + print(f"Given start year '{syear_baseline}' is not in current dataset {data_name}, using first found year:",base_climo_yrs[0],"\n") + syear_baseline = int(base_climo_yrs[0]) + #End if + + if str(eyear_baseline) not in base_climo_yrs: + print(f"Given end year '{eyear_baseline}' is not in current dataset {data_name}, using last found year:",base_climo_yrs[-1],"\n") + eyear_baseline = int(base_climo_yrs[-1]) + #End if #End if else: - if str(syear_baseline) not in base_climo_yrs: - print(f"Given start year '{syear_baseline}' is not in current dataset {data_name}, using first found year:",base_climo_yrs[0],"\n") - syear_baseline = int(base_climo_yrs[0]) - #End if - - if str(eyear_baseline) not in base_climo_yrs: - print(f"Given end year '{eyear_baseline}' is not in current dataset {data_name}, using last found year:",base_climo_yrs[-1],"\n") - eyear_baseline = int(base_climo_yrs[-1]) + #History file path isn't needed if user is running ADF directly on time series. + #So make sure start and end year are specified: + if syear_baseline is None or eyear_baseline is None: + emsg = "Missing starting year ('start_year') and final year ('end_year') " + emsg += "entries in the 'diag_cam_baseline_climo' config section.\n" + emsg += "These are required if the ADF is running " + emsg += "directly from time series files for the basline case." + raise AdfError(emsg) #End if #End if - - data_name += f"_{syear_baseline}_{eyear_baseline}" #End if + #Update baseline case name: + data_name += f"_{syear_baseline}_{eyear_baseline}" + + #Save starting and ending years as object variables: self.__syear_baseline = syear_baseline self.__eyear_baseline = eyear_baseline @@ -205,37 +222,50 @@ def __init__(self, config_file, debug=False): eyears = [None]*len(case_names) #End if - #Loop over cases: - cam_hist_locs = self.get_cam_info('cam_hist_loc', - required=True) + #Extract cam history files location: + cam_hist_locs = self.get_cam_info('cam_hist_loc') + #Loop over cases: for case_idx, case_name in enumerate(case_names): - #Get climo years for verification or assignment if missing - starting_location = Path(cam_hist_locs[case_idx]) - files_list = sorted(starting_location.glob('*'+hist_str+'.*.nc')) - case_climo_yrs = sorted(np.unique([i.stem[-7:-3] for i in files_list])) - - #Check if start or end year is missing. If so then just assume it is the - #start or end of the entire available model data. - if syears[case_idx] is None or eyears[case_idx] is None: - if syears[case_idx] is None: - print(f"No given start year for {case_name}, using first found year...") - syears[case_idx] = int(case_climo_yrs[0]) - #End if - if eyears[case_idx] is None: - print(f"No given end year for {case_name}, using last found year...") - eyears[case_idx] = int(case_climo_yrs[-1]) + #Check if history file path exists: + if cam_hist_locs: + #Get climo years for verification or assignment if missing + starting_location = Path(cam_hist_locs[case_idx]) + files_list = sorted(starting_location.glob('*'+hist_str+'.*.nc')) + case_climo_yrs = sorted(np.unique([i.stem[-7:-3] for i in files_list])) + + #Check if start or end year is missing. If so then just assume it is the + #start or end of the entire available model data. + if syears[case_idx] is None or eyears[case_idx] is None: + if syears[case_idx] is None: + print(f"No given start year for {case_name}, using first found year...") + syears[case_idx] = int(case_climo_yrs[0]) + #End if + if eyears[case_idx] is None: + print(f"No given end year for {case_name}, using last found year...") + eyears[case_idx] = int(case_climo_yrs[-1]) + #End if + else: + if str(syears[case_idx]) not in case_climo_yrs: + print(f"Given start year '{syears[case_idx]}' is not in current dataset {case_name}, using first found year:",case_climo_yrs[0],"\n") + syears[case_idx] = int(case_climo_yrs[0]) + #End if + + if str(eyears[case_idx]) not in case_climo_yrs: + print(f"Given end year '{eyears[case_idx]}' is not in current dataset {case_name}, using last found year:",case_climo_yrs[-1],"\n") + eyears[case_idx] = int(case_climo_yrs[-1]) + #End if #End if else: - if str(syears[case_idx]) not in case_climo_yrs: - print(f"Given start year '{syears[case_idx]}' is not in current dataset {case_name}, using first found year:",case_climo_yrs[0],"\n") - syears[case_idx] = int(case_climo_yrs[0]) - #End if - - if str(eyears[case_idx]) not in case_climo_yrs: - print(f"Given end year '{eyears[case_idx]}' is not in current dataset {case_name}, using last found year:",case_climo_yrs[-1],"\n") - eyears[case_idx] = int(case_climo_yrs[-1]) + #History file path isn't needed if user is running ADF directly on time series. + #So make sure start and end year are specified: + if syears is None or eyears is None: + emsg = "Missing starting year ('start_year') and final year ('end_year') " + emsg += "entries in the 'diag_cam_climo' config section.\n" + emsg += "These are required if the ADF is running " + emsg += "directly from time series files for the test case(s)." + raise AdfError(emsg) #End if #End if diff --git a/run_adf_diag b/run_adf_diag index 687de3da0..8bd44140e 100755 --- a/run_adf_diag +++ b/run_adf_diag @@ -51,7 +51,7 @@ else: #Import ADF diagnostics object: from adf_diag import AdfDiag -#Import ADF diagnostics error classt: +#Import ADF diagnostics error class: from adf_base import AdfError ################# From a63858612f7d5f285e2ad673f744d80b8f495136 Mon Sep 17 00:00:00 2001 From: Jesse Nusbaumer Date: Wed, 10 May 2023 08:59:01 -0600 Subject: [PATCH 3/3] Allow ADF to run properly when given bad start or end years. --- lib/adf_diag.py | 8 +-- lib/adf_info.py | 71 +++++++++++-------------- scripts/averaging/create_climo_files.py | 19 +++---- 3 files changed, 44 insertions(+), 54 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index c3a5eded9..67befb6da 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -803,10 +803,10 @@ def setup_run_cvdp(self): case_names = self.get_cam_info('cam_case_name', required=True) #Start years (not currently required): - syears = self.get_cam_info('start_year') + syears = self.climo_yrs['syears'] #End year (not currently rquired): - eyears = self.get_cam_info('end_year') + eyears = self.climo_yrs['eyears'] #Timeseries locations: cam_ts_loc = self.get_cam_info('cam_ts_loc') @@ -824,8 +824,8 @@ def setup_run_cvdp(self): #check to see if there is a CAM baseline case. If there is, read in relevant information. if not self.get_basic_info('compare_obs'): case_name_baseline = self.get_baseline_info('cam_case_name') - syears_baseline = self.get_baseline_info('start_year') - eyears_baseline = self.get_baseline_info('end_year') + syears_baseline = self.climo_yrs['syear_baseline'] + eyears_baseline = self.climo_yrs['eyear_baseline'] baseline_ts_loc = self.get_baseline_info('cam_ts_loc') #End if diff --git a/lib/adf_info.py b/lib/adf_info.py index fe9d83875..829e49210 100644 --- a/lib/adf_info.py +++ b/lib/adf_info.py @@ -157,25 +157,19 @@ def __init__(self, config_file, debug=False): #Check if start or end year is missing. If so then just assume it is the #start or end of the entire available model data. - if syear_baseline is None or eyear_baseline is None: - if syear_baseline is None: - print(f"No given start year for {data_name}, using first found year...") - syear_baseline = int(base_climo_yrs[0]) - #End if - if eyear_baseline is None: - print(f"No given end year for {data_name}, using last found year...") - eyear_baseline = int(base_climo_yrs[-1]) - #End if - else: - if str(syear_baseline) not in base_climo_yrs: - print(f"Given start year '{syear_baseline}' is not in current dataset {data_name}, using first found year:",base_climo_yrs[0],"\n") - syear_baseline = int(base_climo_yrs[0]) - #End if - - if str(eyear_baseline) not in base_climo_yrs: - print(f"Given end year '{eyear_baseline}' is not in current dataset {data_name}, using last found year:",base_climo_yrs[-1],"\n") - eyear_baseline = int(base_climo_yrs[-1]) - #End if + if syear_baseline is None: + print(f"No given start year for {data_name}, using first found year...") + syear_baseline = int(base_climo_yrs[0]) + elif str(syear_baseline) not in base_climo_yrs: + print(f"Given start year '{syear_baseline}' is not in current dataset {data_name}, using first found year:",base_climo_yrs[0],"\n") + syear_baseline = int(base_climo_yrs[0]) + #End if + if eyear_baseline is None: + print(f"No given end year for {data_name}, using last found year...") + eyear_baseline = int(base_climo_yrs[-1]) + elif str(eyear_baseline) not in base_climo_yrs: + print(f"Given end year '{eyear_baseline}' is not in current dataset {data_name}, using last found year:",base_climo_yrs[-1],"\n") + eyear_baseline = int(base_climo_yrs[-1]) #End if else: #History file path isn't needed if user is running ADF directly on time series. @@ -188,10 +182,10 @@ def __init__(self, config_file, debug=False): raise AdfError(emsg) #End if #End if - #End if - #Update baseline case name: - data_name += f"_{syear_baseline}_{eyear_baseline}" + #Update baseline case name: + data_name += f"_{syear_baseline}_{eyear_baseline}" + #End if (compare_obs) #Save starting and ending years as object variables: self.__syear_baseline = syear_baseline @@ -237,25 +231,19 @@ def __init__(self, config_file, debug=False): #Check if start or end year is missing. If so then just assume it is the #start or end of the entire available model data. - if syears[case_idx] is None or eyears[case_idx] is None: - if syears[case_idx] is None: - print(f"No given start year for {case_name}, using first found year...") - syears[case_idx] = int(case_climo_yrs[0]) - #End if - if eyears[case_idx] is None: - print(f"No given end year for {case_name}, using last found year...") - eyears[case_idx] = int(case_climo_yrs[-1]) - #End if - else: - if str(syears[case_idx]) not in case_climo_yrs: - print(f"Given start year '{syears[case_idx]}' is not in current dataset {case_name}, using first found year:",case_climo_yrs[0],"\n") - syears[case_idx] = int(case_climo_yrs[0]) - #End if - - if str(eyears[case_idx]) not in case_climo_yrs: - print(f"Given end year '{eyears[case_idx]}' is not in current dataset {case_name}, using last found year:",case_climo_yrs[-1],"\n") - eyears[case_idx] = int(case_climo_yrs[-1]) - #End if + if syears[case_idx] is None: + print(f"No given start year for {case_name}, using first found year...") + syears[case_idx] = int(case_climo_yrs[0]) + elif str(syears[case_idx]) not in case_climo_yrs: + print(f"Given start year '{syears[case_idx]}' is not in current dataset {case_name}, using first found year:",case_climo_yrs[0],"\n") + syears[case_idx] = int(case_climo_yrs[0]) + #End if + if eyears[case_idx] is None: + print(f"No given end year for {case_name}, using last found year...") + eyears[case_idx] = int(case_climo_yrs[-1]) + elif str(eyears[case_idx]) not in case_climo_yrs: + print(f"Given end year '{eyears[case_idx]}' is not in current dataset {case_name}, using last found year:",case_climo_yrs[-1],"\n") + eyears[case_idx] = int(case_climo_yrs[-1]) #End if else: #History file path isn't needed if user is running ADF directly on time series. @@ -269,6 +257,7 @@ def __init__(self, config_file, debug=False): #End if #End if + #Update case name with provided/found years: case_name += f"_{syears[case_idx]}_{eyears[case_idx]}" #Set the final directory name and save it to plot_location: diff --git a/scripts/averaging/create_climo_files.py b/scripts/averaging/create_climo_files.py index 1ab912087..776c3846e 100644 --- a/scripts/averaging/create_climo_files.py +++ b/scripts/averaging/create_climo_files.py @@ -65,8 +65,10 @@ def create_climo_files(adf, clobber=False, search=None): output_locs = adf.get_cam_info("cam_climo_loc", required=True) calc_climos = adf.get_cam_info("calc_cam_climo") overwrite = adf.get_cam_info("cam_overwrite_climo") - start_year = adf.get_cam_info("start_year") - end_year = adf.get_cam_info("end_year") + + #Extract simulation years: + start_year = adf.climo_yrs["syears"] + end_year = adf.climo_yrs["eyears"] #If variables weren't provided in config file, then make them a list #containing only None-type entries: @@ -74,10 +76,6 @@ def create_climo_files(adf, clobber=False, search=None): calc_climos = [None]*len(case_names) if not overwrite: overwrite = [None]*len(case_names) - if not start_year: - start_year = [None]*len(case_names) - if not end_year: - end_year = [None]*len(case_names) #End if #Check if a baseline simulation is also being used: @@ -88,8 +86,10 @@ def create_climo_files(adf, clobber=False, search=None): output_bl_loc = adf.get_baseline_info("cam_climo_loc", required=True) calc_bl_climos = adf.get_baseline_info("calc_cam_climo") ovr_bl = adf.get_baseline_info("cam_overwrite_climo") - bl_syr = adf.get_baseline_info("start_year") - bl_eyr = adf.get_baseline_info("end_year") + + #Extract baseline years: + bl_syr = adf.climo_yrs["syear_baseline"] + bl_eyr = adf.climo_yrs["eyear_baseline"] #Append to case lists: case_names.append(baseline_name) @@ -139,6 +139,7 @@ def create_climo_files(adf, clobber=False, search=None): if search is None: search = "{CASE}*.{VARIABLE}.*nc" # NOTE: maybe we should not care about the file extension part at all, but check file type later? + #Check model year bounds: syr, eyr = check_averaging_interval(start_year[case_idx], end_year[case_idx]) #Loop over CAM output variables: @@ -168,7 +169,7 @@ def create_climo_files(adf, clobber=False, search=None): list_of_arguments.append((ts_files, syr, eyr, output_file)) - + #End of var_list loop #--------------------