From 3323720150442d7446e0bb2adbe07ed2161a6207 Mon Sep 17 00:00:00 2001 From: Henrik Andersson Date: Tue, 2 Jan 2024 11:48:29 +0100 Subject: [PATCH 1/6] skill_table also for plotting.scatter --- modelskill/comparison/_collection_plotter.py | 12 ++-- modelskill/comparison/_comparer_plotter.py | 12 ++-- modelskill/plotting/_misc.py | 15 ++--- modelskill/plotting/_scatter.py | 63 +++++++++++++------- tests/plot/test_plot.py | 10 ++-- 5 files changed, 68 insertions(+), 44 deletions(-) diff --git a/modelskill/comparison/_collection_plotter.py b/modelskill/comparison/_collection_plotter.py index 6ab4a74be..ef0007971 100644 --- a/modelskill/comparison/_collection_plotter.py +++ b/modelskill/comparison/_collection_plotter.py @@ -198,7 +198,7 @@ def _scatter_one_model( title = title or f"{mod_name} vs {cc_sel_mod.name}" skill = None - units = None + skill_score_unit = None if skill_table: metrics = None if skill_table is True else skill_table @@ -212,15 +212,17 @@ def _scatter_one_model( skill = cc_sel_mod.mean_skill(metrics=metrics) # type: ignore # TODO improve this try: - units = unit_text.split("[")[1].split("]")[0] + skill_score_unit = unit_text.split("[")[1].split("]")[0] except IndexError: - units = "" # Dimensionless + skill_score_unit = "" # Dimensionless if self.is_directional: # hide quantiles and regression line quantiles = 0 reg_method = False + skill_scores = skill.iloc[0].to_dict() if skill is not None else None + ax = scatter( x=x, y=y, @@ -238,8 +240,8 @@ def _scatter_one_model( title=title, xlabel=xlabel, ylabel=ylabel, - skill_df=skill, - units=units, + skill_scores=skill_scores, + skill_score_unit=skill_score_unit, ax=ax, **kwargs, ) diff --git a/modelskill/comparison/_comparer_plotter.py b/modelskill/comparison/_comparer_plotter.py index b462dbef7..e3edcea84 100644 --- a/modelskill/comparison/_comparer_plotter.py +++ b/modelskill/comparison/_comparer_plotter.py @@ -604,21 +604,23 @@ def _scatter_one_model( title = title or f"{mod_name} vs {cmp.name}" skill = None - units = None + skill_score_unit = None if skill_table: metrics = None if skill_table is True else skill_table skill = cmp_sel_mod.skill(metrics=metrics) # type: ignore try: - units = unit_text.split("[")[1].split("]")[0] + skill_score_unit = unit_text.split("[")[1].split("]")[0] except IndexError: - units = "" # Dimensionless + skill_score_unit = "" # Dimensionless if self.is_directional: # hide quantiles and regression line quantiles = 0 reg_method = False + skill_scores = skill.iloc[0].to_dict() if skill is not None else None + ax = scatter( x=x, y=y, @@ -637,8 +639,8 @@ def _scatter_one_model( title=title, xlabel=xlabel, ylabel=ylabel, - skill_df=skill, - units=units, + skill_scores=skill_scores, + skill_score_unit=skill_score_unit, **kwargs, ) diff --git a/modelskill/plotting/_misc.py b/modelskill/plotting/_misc.py index aa6544caa..1bc6deea6 100644 --- a/modelskill/plotting/_misc.py +++ b/modelskill/plotting/_misc.py @@ -1,6 +1,6 @@ from __future__ import annotations import warnings -from typing import Optional, Sequence, Tuple, Union +from typing import Optional, Sequence, Tuple, Union, Mapping import matplotlib.pyplot as plt from matplotlib.axes import Axes @@ -149,15 +149,12 @@ def quantiles_xy( return np.quantile(x, q=q), np.quantile(y, q=q) -def format_skill_df(df: pd.DataFrame, units: str) -> pd.DataFrame: +def format_skill_table(skill_scores: Mapping[str, float], unit: str) -> pd.DataFrame: # select metrics columns accepted_columns = defined_metrics | {"n"} + kv = {k: v for k, v in skill_scores.items() if k in accepted_columns} - df = df.loc[:, df.columns.isin(accepted_columns)] - - kv = df.iloc[0].to_dict() - - lines = [_format_skill_line(key, value, units) for key, value in kv.items()] + lines = [_format_skill_line(key, value, unit) for key, value in kv.items()] df = pd.DataFrame(lines, columns=["name", "sep", "value"]) return df @@ -166,7 +163,7 @@ def format_skill_df(df: pd.DataFrame, units: str) -> pd.DataFrame: def _format_skill_line( name: str, value: float | int, - units: str, + unit: str, ) -> Tuple[str, str, str]: precision: int = 2 item_unit = " " @@ -175,7 +172,7 @@ def _format_skill_line( if name != "n": if metric_has_units(metric=name): # if statistic has dimensions, then add units - item_unit = unit_display_name(units) + item_unit = unit_display_name(unit) rounded_value = np.round(value, precision) fmt = f".{precision}f" diff --git a/modelskill/plotting/_scatter.py b/modelskill/plotting/_scatter.py index fff32474f..2419d2b3d 100644 --- a/modelskill/plotting/_scatter.py +++ b/modelskill/plotting/_scatter.py @@ -1,5 +1,6 @@ from __future__ import annotations -from typing import Optional, Sequence, Tuple, Callable, TYPE_CHECKING +from typing import Optional, Sequence, Tuple, Callable, TYPE_CHECKING, Mapping +import warnings if TYPE_CHECKING: import matplotlib.axes @@ -7,18 +8,18 @@ import matplotlib.colors as colors import matplotlib.pyplot as plt import numpy as np -import pandas as pd from matplotlib.cm import ScalarMappable from matplotlib import patches from matplotlib.axes import Axes from matplotlib.ticker import MaxNLocator from scipy import interpolate +import pandas as pd import modelskill.settings as settings from modelskill.settings import options from ..metrics import _linear_regression -from ._misc import quantiles_xy, sample_points, format_skill_df, _get_fig_ax +from ._misc import quantiles_xy, sample_points, format_skill_table, _get_fig_ax def scatter( @@ -40,8 +41,9 @@ def scatter( title: str = "", xlabel: str = "", ylabel: str = "", - skill_df: pd.DataFrame | None = None, - units: Optional[str] = "", + skill_table: bool = False, + skill_scores: Mapping[str, float] | None = None, + skill_score_unit: Optional[str] = "", ax: Optional[Axes] = None, **kwargs, ) -> Axes: @@ -100,10 +102,12 @@ def scatter( x-label text on plot, by default None ylabel : str, optional y-label text on plot, by default None - skill_df : dataframe, optional - dataframe with skill (stats) results to be added to plot, by default None - units : str, optional - user default units to override default units, eg 'metre', by default None + skill_table: bool, optional + add a table with skill scores to the plot, by default False + skill_scores : dict, optional + dictionary with skill (stats) results to be added to plot, by default None + skill_score_unit : str, optional + unit for skill_scores, by default None ax : matplotlib.axes.Axes, optional axes to plot on, by default None **kwargs @@ -113,6 +117,13 @@ def scatter( matplotlib.axes.Axes The axes on which the scatter plot was drawn. """ + if "skill_df" in kwargs: + warnings.warn( + "The `skill_df` keyword argument is deprecated. Use `skill_scores` instead.", + FutureWarning, + ) + skill_scores = kwargs.pop("skill_df").iloc[0].to_dict() + if show_hist is None and show_density is None: # Default: points density show_density = True @@ -170,6 +181,18 @@ def scatter( if backend not in PLOTTING_BACKENDS: raise ValueError(f"backend must be one of {list(PLOTTING_BACKENDS.keys())}") + if skill_table: + from modelskill import from_matched + + if skill_scores is not None: + raise ValueError( + "Cannot pass skill_scores and skill_table at the same time" + ) + df = pd.DataFrame({"obs": x, "model": y}) + cmp = from_matched(df) + skill = cmp.skill() + skill_scores = skill.iloc[0].to_dict() + return PLOTTING_BACKENDS[backend]( x=x, y=y, @@ -191,8 +214,8 @@ def scatter( xlim=xlim, ylim=ylim, title=title, - skill_df=skill_df, - units=units, + skill_scores=skill_scores, + skill_score_unit=skill_score_unit, fit_to_quantiles=fit_to_quantiles, ax=ax, **kwargs, @@ -221,8 +244,8 @@ def _scatter_matplotlib( xlim, ylim, title, - skill_df, - units, + skill_scores, + skill_score_unit, fit_to_quantiles, ax, **kwargs, @@ -325,10 +348,8 @@ def _scatter_matplotlib( ax.set_title(title) # Add skill table - if skill_df is not None: - df = skill_df.to_dataframe() - assert isinstance(df, pd.DataFrame) - _plot_summary_table(df, units, max_cbar=max_cbar) + if skill_scores is not None: + _plot_summary_table(skill_scores, skill_score_unit, max_cbar=max_cbar) return ax @@ -354,8 +375,8 @@ def _scatter_plotly( xlim, ylim, title, - skill_df, # TODO implement - units, # TODO implement + skill_scores, # TODO implement + skill_score_unit, # TODO implement fit_to_quantiles, # TODO implement **kwargs, ): @@ -516,9 +537,9 @@ def _plot_summary_border( def _plot_summary_table( - df: pd.DataFrame, units: str, max_cbar: Optional[float] = None + skill_scores: Mapping[str, float], units: str, max_cbar: Optional[float] = None ) -> None: - table = format_skill_df(df, units) + table = format_skill_table(skill_scores, units) cols = ["name", "sep", "value"] text_cols = ["\n".join(table[col]) for col in cols] diff --git a/tests/plot/test_plot.py b/tests/plot/test_plot.py index ce8924cda..2cd4ced78 100644 --- a/tests/plot/test_plot.py +++ b/tests/plot/test_plot.py @@ -3,7 +3,7 @@ import pandas as pd import pytest import modelskill as ms -from modelskill.plotting._misc import format_skill_df +from modelskill.plotting._misc import format_skill_table from modelskill.plotting._misc import sample_points @@ -69,7 +69,7 @@ def test_plot_spatial_overview(o1, o2, o3, mr1): plt.close() -def test_format_skill_df(): +def test_format_skill_table(): # # n bias rmse urmse mae cc si r2 # observation @@ -89,7 +89,9 @@ def test_format_skill_df(): index=["smhi_2095_klagshamn"], ) - df = format_skill_df(skill_df, units="degC") + skill_scores = skill_df.iloc[0].to_dict() + + df = format_skill_table(skill_scores, unit="degC") assert "N" in df.iloc[0, 0] assert "167" in df.iloc[0, 2] assert "BIAS" in df.iloc[1, 0] @@ -103,7 +105,7 @@ def test_format_skill_df(): assert "CC" in df.iloc[5, 0] assert "0.84" in df.iloc[5, 2] - df_short = format_skill_df(skill_df, units="meter") + df_short = format_skill_table(skill_scores, unit="meter") assert "N" in df_short.iloc[0, 0] assert "167" in df_short.iloc[0, 2] From 9aaa6c0c991b383eca5c74f49d1f604f4fa9599e Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Tue, 2 Jan 2024 12:35:01 +0000 Subject: [PATCH 2/6] allow list of metrics like other scatter functions; more eloborate docstring --- modelskill/plotting/_scatter.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/modelskill/plotting/_scatter.py b/modelskill/plotting/_scatter.py index 2419d2b3d..d48ad8e3f 100644 --- a/modelskill/plotting/_scatter.py +++ b/modelskill/plotting/_scatter.py @@ -103,9 +103,14 @@ def scatter( ylabel : str, optional y-label text on plot, by default None skill_table: bool, optional - add a table with skill scores to the plot, by default False - skill_scores : dict, optional - dictionary with skill (stats) results to be added to plot, by default None + calculate skill scores and show in box next to the plot, + True will show default metrics, list of metrics will show + these skill scores, by default False, + Note: cannot be used together with skill_scores argument + skill_scores : dict[str, float], optional + dictionary with skill scores to be shown in box next to + the plot, by default None + Note: cannot be used together with skill_table argument skill_score_unit : str, optional unit for skill_scores, by default None ax : matplotlib.axes.Axes, optional @@ -190,7 +195,8 @@ def scatter( ) df = pd.DataFrame({"obs": x, "model": y}) cmp = from_matched(df) - skill = cmp.skill() + metrics = None if skill_table is True else skill_table + skill = cmp.skill(metrics=metrics) skill_scores = skill.iloc[0].to_dict() return PLOTTING_BACKENDS[backend]( From 1ae7813c6206b7f02414e6c25f98490cdc789877 Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Tue, 2 Jan 2024 12:41:30 +0000 Subject: [PATCH 3/6] correct type hint (add str and list(str)), and docstring changes --- modelskill/plotting/_scatter.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/modelskill/plotting/_scatter.py b/modelskill/plotting/_scatter.py index d48ad8e3f..86e54b071 100644 --- a/modelskill/plotting/_scatter.py +++ b/modelskill/plotting/_scatter.py @@ -41,7 +41,7 @@ def scatter( title: str = "", xlabel: str = "", ylabel: str = "", - skill_table: bool = False, + skill_table: Optional[str | Sequence[str] | bool], skill_scores: Mapping[str, float] | None = None, skill_score_unit: Optional[str] = "", ax: Optional[Axes] = None, @@ -65,9 +65,9 @@ def scatter( number of quantiles for QQ-plot, by default None and will depend on the scatter data length (10, 100 or 1000) if int, this is the number of points if sequence (list of floats), represents the desired quantiles (from 0 to 1) - fit_to_quantiles: bool, optional, by default False + fit_to_quantiles: bool, optional by default the regression line is fitted to all data, if True, it is fitted to the quantiles - which can be useful to represent the extremes of the distribution + which can be useful to represent the extremes of the distribution, by default False show_points : (bool, int, float), optional Should the scatter points be displayed? None means: show all points if fewer than 1e4, otherwise show 1e4 sample points, by default None. @@ -102,7 +102,7 @@ def scatter( x-label text on plot, by default None ylabel : str, optional y-label text on plot, by default None - skill_table: bool, optional + skill_table: str, List[str], bool, optional calculate skill scores and show in box next to the plot, True will show default metrics, list of metrics will show these skill scores, by default False, @@ -137,7 +137,6 @@ def scatter( raise ValueError("x & y are not of equal length") if norm is None: - # Default: PowerNorm with gamma of 0.5 norm = colors.PowerNorm(vmin=1, gamma=0.5) x_sample, y_sample = sample_points(x, y, show_points) @@ -383,7 +382,7 @@ def _scatter_plotly( title, skill_scores, # TODO implement skill_score_unit, # TODO implement - fit_to_quantiles, # TODO implement + fit_to_quantiles, **kwargs, ): import plotly.graph_objects as go From 09b7954d5d7a6956f80b7ead5627fbb7bf147241 Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Tue, 2 Jan 2024 12:52:56 +0000 Subject: [PATCH 4/6] removed default by mistake --- modelskill/plotting/_scatter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelskill/plotting/_scatter.py b/modelskill/plotting/_scatter.py index 86e54b071..c5a494cf5 100644 --- a/modelskill/plotting/_scatter.py +++ b/modelskill/plotting/_scatter.py @@ -41,7 +41,7 @@ def scatter( title: str = "", xlabel: str = "", ylabel: str = "", - skill_table: Optional[str | Sequence[str] | bool], + skill_table: Optional[str | Sequence[str] | bool] = False, skill_scores: Mapping[str, float] | None = None, skill_score_unit: Optional[str] = "", ax: Optional[Axes] = None, From d150b2ba5b7c44efbc518b96fc652ab5cca4060e Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Tue, 2 Jan 2024 13:10:08 +0000 Subject: [PATCH 5/6] to_dict("records")[0] to preserve dtype --- modelskill/plotting/_scatter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelskill/plotting/_scatter.py b/modelskill/plotting/_scatter.py index c5a494cf5..fcd6dc025 100644 --- a/modelskill/plotting/_scatter.py +++ b/modelskill/plotting/_scatter.py @@ -127,7 +127,7 @@ def scatter( "The `skill_df` keyword argument is deprecated. Use `skill_scores` instead.", FutureWarning, ) - skill_scores = kwargs.pop("skill_df").iloc[0].to_dict() + skill_scores = kwargs.pop("skill_df").to_dict("records")[0] if show_hist is None and show_density is None: # Default: points density @@ -196,7 +196,7 @@ def scatter( cmp = from_matched(df) metrics = None if skill_table is True else skill_table skill = cmp.skill(metrics=metrics) - skill_scores = skill.iloc[0].to_dict() + skill_scores = skill.to_dict("records")[0] return PLOTTING_BACKENDS[backend]( x=x, From 13f1de90ac65c764c075bf35392d86efd5f57614 Mon Sep 17 00:00:00 2001 From: Jesper Sandvig Mariegaard <34088801+jsmariegaard@users.noreply.github.com> Date: Tue, 2 Jan 2024 13:15:56 +0000 Subject: [PATCH 6/6] add List and Iterable to list of NON_METRICS --- modelskill/metrics.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modelskill/metrics.py b/modelskill/metrics.py index ae3d0d541..a053b8e0a 100644 --- a/modelskill/metrics.py +++ b/modelskill/metrics.py @@ -1114,6 +1114,8 @@ def metric_has_units(metric: Union[str, Callable]) -> bool: "Optional", "Set", "Tuple", + "List", + "Iterable", "Union", "_c_residual", "_linear_regression",