From dcae56e0a6825d8913905caf0b0d52b5d757d040 Mon Sep 17 00:00:00 2001 From: Damien Irving Date: Wed, 20 Sep 2023 17:36:46 +1000 Subject: [PATCH 1/4] Nicer plots --- unseen/moments.py | 25 +++++++++++++++-- unseen/stability.py | 68 +++++++++++++++++++++++++++++++++------------ 2 files changed, 74 insertions(+), 19 deletions(-) diff --git a/unseen/moments.py b/unseen/moments.py index f775f57..21667bf 100644 --- a/unseen/moments.py +++ b/unseen/moments.py @@ -28,7 +28,7 @@ def calc_moments(sample_da, gev_estimates=[]): moments = {} moments["mean"] = float(np.mean(sample_da)) - moments["std"] = float(np.std(sample_da)) + moments["standard deviation"] = float(np.std(sample_da)) moments["skew"] = float(scipy.stats.skew(sample_da)) moments["kurtosis"] = float(scipy.stats.kurtosis(sample_da)) gev_shape, gev_loc, gev_scale = indices.fit_gev( @@ -68,6 +68,7 @@ def create_plot( da_obs, da_bc_fcst=None, outfile=None, + units=None, ensemble_dim="ensemble", init_dim="init_date", lead_dim="lead_time", @@ -85,6 +86,8 @@ def create_plot( Bias corrected forecast data for metric of interest outfile : str, optional Path for output image file + units : str, optional + units for plot axis labels ensemble_dim : str, default ensemble Name of ensemble member dimension init_dim : str, default init_date @@ -113,7 +116,7 @@ def create_plot( bc_bootstrap_values = {} bc_bootstrap_lower_ci = {} bc_bootstrap_upper_ci = {} - moments = ["mean", "std", "skew", "kurtosis", "GEV shape", "GEV loc", "GEV scale"] + moments = ["mean", "standard deviation", "skew", "kurtosis", "GEV shape", "GEV loc", "GEV scale"] for moment in moments: bootstrap_values[moment] = [] bootstrap_lower_ci[moment] = [] @@ -151,6 +154,16 @@ def create_plot( bc_bootstrap_upper_ci[moment] = bc_upper_ci letters = "abcdefg" + units_label = units if units else da_fcst.attrs["units"] + units = { + "mean": f"mean ({units_label})", + "standard deviation": f"standard deviation ({units_label})", + "skew": "skew", + "kurtosis": "kurtosis", + "GEV shape": "shape parameter", + "GEV scale": "scale parameter", + "GEV location": "location parameter" + } fig = plt.figure(figsize=[15, 20]) for plotnum, moment in enumerate(moments): ax = fig.add_subplot(4, 2, plotnum + 1) @@ -191,6 +204,7 @@ def create_plot( linewidth=3.0, ) ax.set_ylabel("count") + ax.set_xlabel(units[moment]) letter = letters[plotnum] ax.set_title(f"({letter}) {moment}") if letter == "a": @@ -255,6 +269,12 @@ def _parse_command_line(): default=None, help="Minimum lead time", ) + parser.add_argument( + "--units", + type=str, + default=None, + help="Units label for the plot axes", + ) args = parser.parse_args() return args @@ -296,6 +316,7 @@ def _main(): da_bc_fcst=da_bc_fcst, outfile=args.outfile, min_lead=args.min_lead, + units=args.units, ensemble_dim=args.ensemble_dim, init_dim=args.init_dim, lead_dim=args.lead_dim, diff --git a/unseen/stability.py b/unseen/stability.py index 66ec2b7..a390930 100644 --- a/unseen/stability.py +++ b/unseen/stability.py @@ -7,13 +7,21 @@ import seaborn as sns import matplotlib.pyplot as plt from scipy.stats import genextreme as gev +import matplotlib as mpl from . import fileio from . import indices from . import time_utils -def plot_dist_by_lead(ax, sample_da, metric, lead_dim="lead_time"): +mpl.rcParams['axes.titlesize'] = 'xx-large' +mpl.rcParams['xtick.labelsize'] = 'x-large' +mpl.rcParams['ytick.labelsize'] = 'x-large' +mpl.rcParams['legend.fontsize'] = 'large' +axis_label_size = 'large' + + +def plot_dist_by_lead(ax, sample_da, metric, units=None, lead_dim="lead_time"): """Plot distribution curves for each lead time. Parameters @@ -24,6 +32,8 @@ def plot_dist_by_lead(ax, sample_da, metric, lead_dim="lead_time"): Stacked forecast array with a sample dimension metric : str Metric name for plot title + units : str, optional + units for plot axis labels lead_dim: str, default 'lead_time' Name of the lead time dimension in sample_da """ @@ -41,14 +51,16 @@ def plot_dist_by_lead(ax, sample_da, metric, lead_dim="lead_time"): ax=ax, color=color, label=f"lead time: {lead} year ({n_values} samples)", + linewidth=2.0, ) ax.grid(True) ax.set_title(f"(a) {metric} distribution by lead time") - ax.set_xlabel(sample_da.attrs["units"]) + units_label = units if units else sample_da.attrs["units"] + ax.set_xlabel(units_label, fontsize=axis_label_size) ax.legend() -def plot_dist_by_time(ax, sample_da, metric, start_years): +def plot_dist_by_time(ax, sample_da, metric, start_years, units=None): """Plot distribution curves for each time slice (e.g. decade). Parameters @@ -61,6 +73,8 @@ def plot_dist_by_time(ax, sample_da, metric, start_years): Metric name for plot title start_years : list Equally spaced list of start years + units : str, optional + units for plot axis labels """ step = start_years[1] - start_years[0] - 1 @@ -79,10 +93,12 @@ def plot_dist_by_time(ax, sample_da, metric, start_years): ax=ax, color=color, label=f"{start_year}-{end_year} ({n_values} samples)", + linewidth=2.0, ) ax.grid(True) ax.set_title(f"(c) {metric} distribution by year") - ax.set_xlabel(sample_da.attrs["units"]) + units_label = units if units else sample_da.attrs["units"] + ax.set_xlabel(units_label, fontsize=axis_label_size) ax.legend() @@ -129,7 +145,7 @@ def plot_return(data, method, outfile=None): ax.plot(return_periods, return_values) ax.set_xscale("log") ax.set_xlabel("return period (years)") - ax.set_ylabel(data.attrs["units"]) + ax.set_ylabel(data.attrs["units"], ) ax.grid() if outfile: plt.savefig(outfile, bbox_inches="tight", facecolor="white", dpi=200) @@ -139,7 +155,7 @@ def plot_return(data, method, outfile=None): def plot_return_by_lead( - ax, sample_da, metric, method, uncertainty=False, ymax=None, lead_dim="lead_time" + ax, sample_da, metric, method, uncertainty=False, units=None, ymax=None, lead_dim="lead_time" ): """Plot return period curves for each lead time. @@ -155,6 +171,8 @@ def plot_return_by_lead( Method for producing return period curve uncertainty: bool, default False Plot 95% confidence interval + units : str, optional + units for plot axis labels ymax : float, optional ymax for return curve plot lead_dim: str, default 'lead_time' @@ -170,7 +188,7 @@ def plot_return_by_lead( n_values = len(selection_da) label = f"lead time {lead} ({n_values} samples)" color = next(colors) - ax.plot(return_periods, return_values, label=label, color=color) + ax.plot(return_periods, return_values, label=label, color=color, linewidth=2.0) if uncertainty: random_return_values = [] @@ -187,20 +205,21 @@ def plot_return_by_lead( lower_ci, label="95% confidence interval", color="0.5", - alpha=0.1, + alpha=0.3, ) ax.grid(True) ax.set_title(f"(b) {metric} return period by lead time") ax.set_xscale("log") - ax.set_xlabel("return period (years)") - ax.set_ylabel(sample_da.attrs["units"]) + ax.set_xlabel("return period (years)", fontsize=axis_label_size) + units_label = units if units else sample_da.attrs["units"] + ax.set_ylabel(units_label, fontsize=axis_label_size) ax.legend() ax.set_ylim((50, ymax)) def plot_return_by_time( - ax, sample_da, metric, start_years, method, uncertainty=False, ymax=None + ax, sample_da, metric, start_years, method, uncertainty=False, units=None, ymax=None ): """Plot return period curves for each time slice (e.g. decade). @@ -218,6 +237,8 @@ def plot_return_by_time( Method for producing return period curve uncertainty: bool, default False Plot 95% confidence interval + units : str, optional + units for plot axis labels ymax : float, optional ymax for return curve plot """ @@ -234,7 +255,7 @@ def plot_return_by_time( n_years = len(selection_da) label = f"{start_year}-{end_year} ({n_years} samples)" color = next(colors) - ax.plot(return_periods, return_values, label=label, color=color) + ax.plot(return_periods, return_values, label=label, color=color, linewidth=2.0) if uncertainty: random_return_values = [] @@ -251,14 +272,15 @@ def plot_return_by_time( lower_ci, label="95% confidence interval", color="0.5", - alpha=0.2, + alpha=0.3, ) ax.grid(True) ax.set_title(f"(d) {metric} return period by year") ax.set_xscale("log") - ax.set_xlabel("return period (years)") - ax.set_ylabel(sample_da.attrs["units"]) + ax.set_xlabel("return period (years)", fontsize=axis_label_size) + units_label = units if units else sample_da.attrs["units"] + ax.set_ylabel(units_label, fontsize=axis_label_size) ax.set_ylim((50, ymax)) ax.legend() @@ -270,6 +292,7 @@ def create_plot( outfile=None, uncertainty=False, ymax=None, + units=None, return_method="empirical", ensemble_dim="ensemble", init_dim="init_date", @@ -291,6 +314,8 @@ def create_plot( Plot the 95% confidence interval ymax : float, optional ymax for return curve plots + units : str, optional + units for plot axis labels return_method : {'empirical', 'gev'}, default empirial Method for fitting the return period curve ensemble_dim : str, default ensemble @@ -310,7 +335,7 @@ def create_plot( ax3 = fig.add_subplot(223) ax4 = fig.add_subplot(224) - plot_dist_by_lead(ax1, da_fcst_stacked, metric, lead_dim=lead_dim) + plot_dist_by_lead(ax1, da_fcst_stacked, metric, units=units, lead_dim=lead_dim) plot_return_by_lead( ax2, da_fcst_stacked, @@ -318,15 +343,17 @@ def create_plot( return_method, uncertainty=uncertainty, ymax=ymax, + units=units, lead_dim=lead_dim, ) - plot_dist_by_time(ax3, da_fcst_stacked, metric, start_years) + plot_dist_by_time(ax3, da_fcst_stacked, metric, start_years, units=units) plot_return_by_time( ax4, da_fcst_stacked, metric, start_years, return_method, + units=units, uncertainty=uncertainty, ymax=ymax, ) @@ -394,6 +421,12 @@ def _parse_command_line(): default="lead_time", help="Name of lead time dimension", ) + parser.add_argument( + "--units", + type=str, + default=None, + help="Units label for the plot axes", + ) args = parser.parse_args() return args @@ -415,6 +448,7 @@ def _main(): return_method=args.return_method, uncertainty=args.uncertainty, ymax=args.ymax, + units=args.units, ensemble_dim=args.ensemble_dim, init_dim=args.init_dim, lead_dim=args.lead_dim, From a14083a79227bb5416936ecc99bc3e36aa5fcd5c Mon Sep 17 00:00:00 2001 From: Damien Irving Date: Thu, 21 Sep 2023 16:38:20 +1000 Subject: [PATCH 2/4] Fix bug --- unseen/moments.py | 1 - 1 file changed, 1 deletion(-) diff --git a/unseen/moments.py b/unseen/moments.py index 21667bf..a7d7c99 100644 --- a/unseen/moments.py +++ b/unseen/moments.py @@ -315,7 +315,6 @@ def _main(): da_obs, da_bc_fcst=da_bc_fcst, outfile=args.outfile, - min_lead=args.min_lead, units=args.units, ensemble_dim=args.ensemble_dim, init_dim=args.init_dim, From c5a36086972e1be6689e94086dd5fc9a637d4fc1 Mon Sep 17 00:00:00 2001 From: Damien Irving Date: Fri, 22 Sep 2023 14:32:21 +1000 Subject: [PATCH 3/4] Add axis labels --- unseen/moments.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/unseen/moments.py b/unseen/moments.py index a7d7c99..98b49d2 100644 --- a/unseen/moments.py +++ b/unseen/moments.py @@ -35,7 +35,7 @@ def calc_moments(sample_da, gev_estimates=[]): sample_da, user_estimates=gev_estimates ) moments["GEV shape"] = gev_shape - moments["GEV loc"] = gev_loc + moments["GEV location"] = gev_loc moments["GEV scale"] = gev_scale return moments @@ -116,7 +116,7 @@ def create_plot( bc_bootstrap_values = {} bc_bootstrap_lower_ci = {} bc_bootstrap_upper_ci = {} - moments = ["mean", "standard deviation", "skew", "kurtosis", "GEV shape", "GEV loc", "GEV scale"] + moments = ["mean", "standard deviation", "skew", "kurtosis", "GEV shape", "GEV location", "GEV scale"] for moment in moments: bootstrap_values[moment] = [] bootstrap_lower_ci[moment] = [] @@ -130,7 +130,7 @@ def create_plot( random_sample = np.random.choice(da_fcst_stacked, sample_size) sample_moments = calc_moments( random_sample, - gev_estimates=[moments_fcst["GEV loc"], moments_fcst["GEV scale"]], + gev_estimates=[moments_fcst["GEV location"], moments_fcst["GEV scale"]], ) for moment in moments: bootstrap_values[moment].append(sample_moments[moment]) @@ -139,7 +139,7 @@ def create_plot( bc_random_sample = np.random.choice(da_bc_fcst_stacked, sample_size) bc_sample_moments = calc_moments( bc_random_sample, - gev_estimates=[moments_fcst["GEV loc"], moments_fcst["GEV scale"]], + gev_estimates=[moments_fcst["GEV location"], moments_fcst["GEV scale"]], ) for moment in moments: bc_bootstrap_values[moment].append(bc_sample_moments[moment]) @@ -164,7 +164,7 @@ def create_plot( "GEV scale": "scale parameter", "GEV location": "location parameter" } - fig = plt.figure(figsize=[15, 20]) + fig = plt.figure(figsize=[15, 22]) for plotnum, moment in enumerate(moments): ax = fig.add_subplot(4, 2, plotnum + 1) ax.hist( From 4f261032da9045070bdead7fd835c01b61088020 Mon Sep 17 00:00:00 2001 From: Damien Irving Date: Fri, 22 Sep 2023 14:40:42 +1000 Subject: [PATCH 4/4] Formatting fixes --- unseen/moments.py | 12 ++++++++++-- unseen/stability.py | 21 ++++++++++++++------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/unseen/moments.py b/unseen/moments.py index 98b49d2..2719fc3 100644 --- a/unseen/moments.py +++ b/unseen/moments.py @@ -116,7 +116,15 @@ def create_plot( bc_bootstrap_values = {} bc_bootstrap_lower_ci = {} bc_bootstrap_upper_ci = {} - moments = ["mean", "standard deviation", "skew", "kurtosis", "GEV shape", "GEV location", "GEV scale"] + moments = [ + "mean", + "standard deviation", + "skew", + "kurtosis", + "GEV shape", + "GEV location", + "GEV scale", + ] for moment in moments: bootstrap_values[moment] = [] bootstrap_lower_ci[moment] = [] @@ -162,7 +170,7 @@ def create_plot( "kurtosis": "kurtosis", "GEV shape": "shape parameter", "GEV scale": "scale parameter", - "GEV location": "location parameter" + "GEV location": "location parameter", } fig = plt.figure(figsize=[15, 22]) for plotnum, moment in enumerate(moments): diff --git a/unseen/stability.py b/unseen/stability.py index a390930..d8bc1f4 100644 --- a/unseen/stability.py +++ b/unseen/stability.py @@ -14,11 +14,11 @@ from . import time_utils -mpl.rcParams['axes.titlesize'] = 'xx-large' -mpl.rcParams['xtick.labelsize'] = 'x-large' -mpl.rcParams['ytick.labelsize'] = 'x-large' -mpl.rcParams['legend.fontsize'] = 'large' -axis_label_size = 'large' +mpl.rcParams["axes.titlesize"] = "xx-large" +mpl.rcParams["xtick.labelsize"] = "x-large" +mpl.rcParams["ytick.labelsize"] = "x-large" +mpl.rcParams["legend.fontsize"] = "large" +axis_label_size = "large" def plot_dist_by_lead(ax, sample_da, metric, units=None, lead_dim="lead_time"): @@ -145,7 +145,7 @@ def plot_return(data, method, outfile=None): ax.plot(return_periods, return_values) ax.set_xscale("log") ax.set_xlabel("return period (years)") - ax.set_ylabel(data.attrs["units"], ) + ax.set_ylabel(data.attrs["units"]) ax.grid() if outfile: plt.savefig(outfile, bbox_inches="tight", facecolor="white", dpi=200) @@ -155,7 +155,14 @@ def plot_return(data, method, outfile=None): def plot_return_by_lead( - ax, sample_da, metric, method, uncertainty=False, units=None, ymax=None, lead_dim="lead_time" + ax, + sample_da, + metric, + method, + uncertainty=False, + units=None, + ymax=None, + lead_dim="lead_time", ): """Plot return period curves for each lead time.