From 08624a9c6e2a04e69f190e3f2ece030d079bbebf Mon Sep 17 00:00:00 2001 From: Markus Semmler Date: Mon, 12 Feb 2024 17:26:47 +0100 Subject: [PATCH] Update plotting, integrate global accuracy and minimize layout size. --- params.yaml | 50 +++-- .../calculate_threshold_characteristics.py | 22 +-- scripts/render_plots.py | 182 ++++++++++++------ scripts/run_pipeline.py | 2 +- src/re_classwise_shapley/io.py | 44 ++--- src/re_classwise_shapley/plotting.py | 72 +++---- src/re_classwise_shapley/utils.py | 44 +---- src/re_classwise_shapley/valuation_methods.py | 6 +- 8 files changed, 226 insertions(+), 196 deletions(-) diff --git a/params.yaml b/params.yaml index 4c56f728..4475cc64 100644 --- a/params.yaml +++ b/params.yaml @@ -1,10 +1,10 @@ settings: - n_jobs: 4 + n_jobs: 95 backend: joblib mlflow_tracking_uri: http://localhost:5000 threshold_characteristics: - active: false + active: true valuation_method: banzhaf_shapley # Method used to calculate the threshold characteristics. model: logistic_regression # Default model to use for determining the values max_plotting_percentage: 1e-4 # Threshold for stopping plotting in direction of x-axis. @@ -34,8 +34,6 @@ active: - tmc_shapley - beta_shapley - loo - - banzhaf_shapley - - least_core repetitions: from: 1 to: 20 @@ -48,31 +46,31 @@ experiments: fn: metric metric: accuracy eval_model: logistic_regression - plot: + plots: - accuracy weighted_accuracy_drop_knn: fn: metric metric: accuracy eval_model: knn - plot: + plots: - accuracy weighted_accuracy_drop_gradient_boosting_classifier: fn: metric metric: accuracy eval_model: gradient_boosting_classifier - plot: + plots: - accuracy weighted_accuracy_drop_svm: fn: metric metric: accuracy eval_model: svm - plot: + plots: - accuracy weighted_accuracy_drop_mlp: fn: metric metric: accuracy eval_model: mlp - plot: + plots: - accuracy metrics: @@ -85,9 +83,9 @@ experiments: - weighted_accuracy_drop_mlp fn: geometric_weighted_drop input_perc: 1.0 - plot: - - table - - boxplot + plots: + - table_wad + - box_wad geometric_weighted_drop_half: curve: @@ -98,9 +96,9 @@ experiments: - weighted_accuracy_drop_mlp fn: geometric_weighted_drop input_perc: 0.5 - plot: - - table - - boxplot + plots: + - table_wad + - box_wad noise_removal: sampler: default @@ -118,14 +116,14 @@ experiments: curve: - precision_recall fn: roc_auc - plot: - - table - - boxplot + plots: + - table_auc + - box_auc plots: accuracy: type: line - len_curve_perc: 0.5 + plot_perc: 0.5 x_label: "n" y_label: "Accuracy" @@ -134,6 +132,20 @@ plots: x_label: "Recall" y_label: "Precision" + table_wad: + type: table + + table_auc: + type: table + + box_wad: + type: boxplot + x_label: "WAD" + + box_auc: + type: boxplot + x_label: "AUC" + samplers: default: diff --git a/scripts/calculate_threshold_characteristics.py b/scripts/calculate_threshold_characteristics.py index 2a2f7e5f..9aad6c08 100644 --- a/scripts/calculate_threshold_characteristics.py +++ b/scripts/calculate_threshold_characteristics.py @@ -94,7 +94,7 @@ def _calculate_threshold_characteristics( n_jobs = params["settings"]["n_jobs"] logger.info("Calculating in class characteristics.") - in_cls_mar_acc, in_cls_stats = calculate_subset_score( + in_cls_mar_acc = calculate_subset_score( val_set, lambda c: np.argwhere(val_set.y_train == c)[:, 0], model_name, @@ -104,10 +104,11 @@ def _calculate_threshold_characteristics( n_jobs, backend, ) + logger.info("Calculating out of class characteristics.") - out_of_cls_mar_acc, out_of_cls_stats = calculate_subset_score( + global_mar_acc = calculate_subset_score( val_set, - lambda c: np.argwhere(val_set.y_train != c)[:, 0], + lambda c: np.argwhere((val_set.y_train == c) | (val_set.y_train != c))[:, 0], model_name, model_seed, sampler_seed, @@ -116,20 +117,11 @@ def _calculate_threshold_characteristics( backend, ) - logger.info("Calculating curves and statistics.") - threshold_characteristics_curves = calculate_threshold_characteristic_curves( - in_cls_mar_acc, out_of_cls_mar_acc - ) - in_cls_out_of_cls_stats = pd.DataFrame( - [in_cls_stats, out_of_cls_stats], index=["in_cls", "out_of_cls"] - ) - logger.info("Storing files.") os.makedirs(output_dir, exist_ok=True) - in_cls_out_of_cls_stats.to_csv(output_dir / "threshold_characteristics_stats.csv") - threshold_characteristics_curves.to_csv( - output_dir / "threshold_characteristics_curves.csv", sep=";" - ) + + np.savetxt(output_dir / "in_cls_mar_acc.txt", in_cls_mar_acc) + np.savetxt(output_dir / "global_mar_acc.txt", global_mar_acc) if __name__ == "__main__": diff --git a/scripts/render_plots.py b/scripts/render_plots.py index a84fb9fa..a6dee4cf 100644 --- a/scripts/render_plots.py +++ b/scripts/render_plots.py @@ -73,21 +73,25 @@ def _render_plots(experiment_name: str, model_name: str): dataset_names = params_active["datasets"] method_names = params_active["valuation_methods"] repetitions = params_active["repetitions"] + repetitions = list(range(repetitions["from"], repetitions["to"] + 1)) + curves_def = params["experiments"][experiment_name]["curves"] + curve_names = list(curves_def.keys()) metrics_def = params["experiments"][experiment_name]["metrics"] - metrics = list(metrics_def.keys()) + metric_names = list(metrics_def.keys()) mlflow.set_tracking_uri(params["settings"]["mlflow_tracking_uri"]) experiment_id = get_or_create_mlflow_experiment(mlflow_id) os.makedirs(output_folder, exist_ok=True) - logger.info("Starting run.") + logger.info(f"Starting experiment with id `{experiment_id}.") with mlflow.start_run( experiment_id=experiment_id, run_name=datetime.now().isoformat(), ): - logger.info("Log parameters.") + logger.info("Flatten parameters & upload to mlflow.") mlflow.log_params(flatten_dict(params)) - logger.info("Log datasets.") + + logger.info("Record datasets in mlflow.") log_datasets( Accessor.datasets( experiment_name, @@ -96,25 +100,8 @@ def _render_plots(experiment_name: str, model_name: str): ) plt.switch_backend("agg") - logger.info(f"Plot threshold characteristics.") - plot_threshold_characteristics_results = ( - Accessor.threshold_characteristics_results( - experiment_name, - dataset_names, - repetitions, - ) - ) - - with plot_threshold_characteristics( - plot_threshold_characteristics_results - ) as fig: - log_figure( - fig, - output_folder, - f"threshold_characteristics.svg", - "threshold_characteristics", - ) + logger.info(f"Load valuations results.") valuation_results = Accessor.valuation_results( experiment_name, model_name, @@ -123,7 +110,7 @@ def _render_plots(experiment_name: str, model_name: str): method_names, ) for method_name in method_names: - logger.info(f"Plot histogram for method {method_name} values.") + logger.info(f"Plot histogram for values of method `{method_name}`.") with plot_histogram(valuation_results, [method_name]) as fig: log_figure( fig, output_folder, f"density.{method_name}.svg", "densities" @@ -133,52 +120,127 @@ def _render_plots(experiment_name: str, model_name: str): with plot_time(valuation_results) as fig: log_figure(fig, output_folder, "time.svg", "boxplots") - metrics_and_curves = Accessor.metrics_and_curves( + logger.info("Loading curves form hard disk.") + loaded_curves = Accessor.curves( experiment_name, model_name, dataset_names, method_names, + curve_names, repetitions, - metrics, ) - for metric_name in metrics: - metric_and_curves_for_metric = metrics_and_curves.loc[ - metrics_and_curves["metric_name"] == metric_name + for curve_name in curve_names: + logger.info(f"Processing curve '{curve_name}'.") + selected_loaded_curves = loaded_curves.loc[ + loaded_curves["curve_name"] == curve_name ].copy() + curve_def = curves_def[curve_name] + for plot_settings_name in curve_def["plots"]: + plot_settings = params["plots"][plot_settings_name] + logger.info( + f"Plotting {plot_settings['type']} plot with name '{plot_settings_name}'" + ) + match plot_settings["type"]: + case "line": + plot_perc = plot_settings.get("plot_perc", 1.0) + x_label = plot_settings.get("x_label", None) + y_label = plot_settings.get("y_label", None) + with plot_curves( + selected_loaded_curves, + plot_perc=plot_perc, + x_label=x_label, + y_label=y_label, + ) as fig: + log_figure( + fig, output_folder, f"{curve_name}.svg", "curves" + ) + case _: + raise NotImplementedError + + logger.info("Loading metrics form hard disk.") + loaded_metrics = Accessor.metrics( + experiment_name, + model_name, + dataset_names, + method_names, + metric_names, + repetitions, + curve_name, + ) + for metric_name in metric_names: + logger.info(f"Processing metric '{metric_name}'.") + selected_loaded_metrics = loaded_metrics.loc[ + loaded_metrics["metric_name"] == metric_name + ].copy() + metric_def = metrics_def[metric_name] + for plot_settings_name in metric_def["plots"]: + plot_settings = params["plots"][plot_settings_name] + logger.info( + f"Plotting {plot_settings['type']} plot with name '{plot_settings_name}'" + ) + match plot_settings["type"]: + case "table": + logger.info( + f"Converting df to table for metric '{metric_name}'." + ) + metric_table = linear_dataframe_to_table( + selected_loaded_metrics, + "dataset_name", + "method_name", + "metric", + np.mean, + ) + for dataset_name, row in metric_table.items(): + for method_name, v in row.items(): + mlflow.log_metric( + f"{metric_name}.{dataset_name}.{method_name}", v + ) + + logger.info(f"Plotting table for metric '{metric_name}'.") + with plot_metric_table(metric_table) as fig: + log_figure( + fig, + output_folder, + f"{metric_name}.{curve_name}.table.svg", + "tables", + ) + case "boxplot": + x_label = plot_settings.get("x_label", None) + logger.info(f"Plotting boxplot for metric '{metric_name}'.") + with plot_metric_boxplot( + selected_loaded_metrics, x_label=x_label + ) as fig: + log_figure( + fig, + output_folder, + f"{metric_name}.{curve_name}.box.svg", + "boxplots", + ) + + case _: + raise NotImplementedError + + logger.info(f"Load threshold characteristics.") + plot_threshold_characteristics_results = ( + Accessor.threshold_characteristics_results( + experiment_name, + dataset_names, + repetitions, + ) + ) - len_curve_perc = metrics_def[metric_name].pop("len_curve_perc", None) - curve_label = metrics_def[metric_name].pop("curve_label", None) - y_label = metrics_def[metric_name].pop("y_label", None) - logger.info(f"Plotting curve for metric {metric_name}.") - with plot_curves( - metric_and_curves_for_metric, - len_curve_perc=len_curve_perc, - x_label=curve_label, - y_label=y_label, - ) as fig: - log_figure(fig, output_folder, f"{metric_name}.svg", "curves") - - logger.info(f"Plotting table for metric {metric_name}.") - metric_table = linear_dataframe_to_table( - metric_and_curves_for_metric, - "dataset_name", - "method_name", - "metric", - np.mean, + logger.info(f"Plot threshold characteristics.") + with plot_threshold_characteristics( + plot_threshold_characteristics_results + ) as fig: + log_figure( + fig, + output_folder, + f"threshold_characteristics.svg", + "threshold_characteristics", ) - for dataset_name, row in metric_table.items(): - for method_name, v in row.items(): - mlflow.log_metric(f"{metric_name}.{dataset_name}.{method_name}", v) - - with plot_metric_table(metric_table) as fig: - log_figure(fig, output_folder, f"{metric_name}.table.svg", "tables") - - metric_label = metrics_def[metric_name].pop("metric_label", None) - logger.info(f"Plotting boxplot for metric {metric_name}.") - with plot_metric_boxplot( - metric_and_curves_for_metric, x_label=metric_label - ) as fig: - log_figure(fig, output_folder, f"{metric_name}.box.svg", "boxplots") + + logger.info(f"Finished rendering plots and metrics.") if __name__ == "__main__": diff --git a/scripts/run_pipeline.py b/scripts/run_pipeline.py index e9433f76..8c3cdccf 100644 --- a/scripts/run_pipeline.py +++ b/scripts/run_pipeline.py @@ -168,7 +168,7 @@ def run_pipeline(): ) logger.info(f"Render plots for {experiment_name} and {model_name}.") - # _render_plots(experiment_name, model_name) + _render_plots(experiment_name, model_name) except KeyboardInterrupt: logger.info("Interrupted by Ctrl+C.") diff --git a/src/re_classwise_shapley/io.py b/src/re_classwise_shapley/io.py index ae4dd73a..054bb854 100644 --- a/src/re_classwise_shapley/io.py +++ b/src/re_classwise_shapley/io.py @@ -24,6 +24,8 @@ "Accessor", ] +from re_classwise_shapley.utils import calculate_threshold_characteristic_curves + logger = setup_logger(__name__) @@ -252,13 +254,16 @@ def threshold_characteristics_results( ) if not os.path.exists(folder): raise ValueError - curves = pd.read_csv(folder / "threshold_characteristics_curves.csv", sep=";") - curves = curves.set_index(curves.columns[0]) - stats = pd.read_csv(folder / "threshold_characteristics_stats.csv") + + in_cls_mar_acc = np.loadtxt(folder / "in_cls_mar_acc.txt") + global_mar_acc = np.loadtxt(folder / "global_mar_acc.txt") + characteristics = calculate_threshold_characteristic_curves( + in_cls_mar_acc, global_mar_acc + ) + characteristics = characteristics.set_index(characteristics.columns[0]) return { "dataset_name": dataset_name, - "curves": curves, - "stats": stats, + "characteristics": characteristics, } @staticmethod @@ -354,42 +359,26 @@ def curves( @staticmethod @walker_product_space() - def metrics_and_curves( + def metrics( experiment_name: str, model_name: str, dataset_name: str, method_name: str, - repetition_id: int, metric_name: str, + repetition_id: int, + curve_name: str, ) -> Dict: - """ - Load metrics and curves from the results directory. - - Args: - experiment_name: The name of the experiment. - model_name: The name of the model. - dataset_name: The name of the dataset. - method_name: The name of the method. - repetition_id: The repetition ID. - metric_name: The name of the metric. - - Returns: - A dictionary containing the metrics and curves. - """ base_path = ( - Accessor.CURVES_PATH + Accessor.METRICS_PATH / experiment_name / model_name / dataset_name / str(repetition_id) / method_name ) - metric = pd.read_csv(base_path / f"{metric_name}.csv") - metric = metric.iloc[-1, -1] - curve = pd.read_csv(base_path / f"{metric_name}.curve.csv") - curve.index = curve[curve.columns[0]] - curve = curve.drop(columns=[curve.columns[0]]).iloc[:, -1] + metric = pd.read_csv(base_path / f"{metric_name}.{curve_name}.csv") + metric = metric.iloc[-1, -1] return { "experiment_name": experiment_name, @@ -399,7 +388,6 @@ def metrics_and_curves( "repetition_id": repetition_id, "metric_name": metric_name, "metric": metric, - "curve": curve, } @staticmethod diff --git a/src/re_classwise_shapley/plotting.py b/src/re_classwise_shapley/plotting.py index ba59dcc9..4513b5d9 100644 --- a/src/re_classwise_shapley/plotting.py +++ b/src/re_classwise_shapley/plotting.py @@ -113,7 +113,7 @@ def plot_grid_over_datasets( data: pd.DataFrame, plot_func: Callable, patch_size: Tuple[float, float] = (4, 4), - n_cols: int = 3, + n_cols: int = 5, legend: bool = False, format_x_ticks: str = None, tick_params_left_only: bool = False, @@ -145,9 +145,10 @@ def plot_grid_over_datasets( A figure containing the plot. """ dataset_names = data["dataset_name"].unique().tolist() - n_rows = int((len(dataset_names) + n_cols - 1) / n_cols) + n_plots = len(dataset_names) + n_rows = int((n_plots + n_cols - 1) / n_cols) fig, ax = plt.subplots( - n_rows, n_cols, figsize=(n_rows * patch_size[0], n_cols * patch_size[1]) + n_rows, n_cols, figsize=(n_cols * patch_size[0], n_rows * patch_size[1]) ) ax = ax.flatten() @@ -186,19 +187,24 @@ def plot_grid_over_datasets( plt.tight_layout() if legend: - legend_kwargs = {"framealpha": 0} handles, labels = ax[0].get_legend_handles_labels() - fig.legend( - handles, - labels, - loc="outside lower center", - ncol=5, - fontsize=12, - fancybox=False, - shadow=False, - **legend_kwargs, - ) - fig.subplots_adjust(bottom=0.1) + if n_plots % 2 == 0: + legend_kwargs = {"framealpha": 0} + fig.legend( + handles, + labels, + loc="outside lower center", + ncol=5, + fontsize=12, + fancybox=False, + shadow=False, + **legend_kwargs, + ) + fig.subplots_adjust(bottom=0.1) + else: + last = ax[-1] + last.set_axis_off() + last.legend(handles, labels, loc="center", prop={"size": 12}) yield fig plt.close(fig) @@ -208,8 +214,8 @@ def plot_grid_over_datasets( def plot_histogram( data: pd.DataFrame, method_names: OneOrMany[str], - patch_size: Tuple[float, float] = (4, 4), - n_cols: int = 3, + patch_size: Tuple[float, float] = (5, 5), + n_cols: int = 5, ) -> plt.Figure: """ Plot the histogram of the data values for each dataset and valuation method. @@ -274,8 +280,8 @@ def plot_histogram_func( @contextmanager def plot_time( data: pd.DataFrame, - patch_size: Tuple[float, float] = (4, 4), - n_cols: int = 3, + patch_size: Tuple[float, float] = (5, 5), + n_cols: int = 5, ) -> plt.Figure: """ Plot execution times as boxplot. @@ -318,9 +324,9 @@ def plot_time_func(data: pd.DataFrame, ax: plt.Axes, **kwargs): @contextmanager def plot_curves( data: pd.DataFrame, - patch_size: Tuple[float, float] = (4, 3), - n_cols: int = 3, - len_curve_perc: float = None, + patch_size: Tuple[float, float] = (6, 5), + n_cols: int = 5, + plot_perc: float = None, x_label: str = None, y_label: str = None, ) -> plt.Figure: @@ -331,7 +337,7 @@ def plot_curves( data: A pd.DataFrame with the curve data. patch_size: Size of one image patch of the multi plot. n_cols: Number of columns for subplot layout. - len_curve_perc: Percentage of the curve length to plot. + plot_perc: Percentage of the curve length to plot. """ def plot_curves_func(data: pd.DataFrame, ax: plt.Axes, **kwargs): @@ -341,10 +347,8 @@ def plot_curves_func(data: pd.DataFrame, ax: plt.Axes, **kwargs): mean_color, shade_color = COLORS[color_name] results = pd.concat(method_data["curve"].tolist(), axis=1) - if len_curve_perc is not None: - results = results.iloc[ - : int(m.ceil(len_curve_perc * results.shape[0])), : - ] + if plot_perc is not None: + results = results.iloc[: int(m.ceil(plot_perc * results.shape[0])), :] shaded_mean_normal_confidence_interval( results, @@ -388,8 +392,8 @@ def plot_metric_table( @contextmanager def plot_metric_boxplot( data: pd.DataFrame, - patch_size: Tuple[float, float] = (4, 4), - n_cols: int = 3, + patch_size: Tuple[float, float] = (5, 5), + n_cols: int = 5, x_label: str = None, ) -> plt.Figure: """ @@ -430,8 +434,8 @@ def plot_metric_boxplot_func(data: pd.DataFrame, ax: plt.Axes, **kwargs): @contextmanager def plot_threshold_characteristics( results: pd.DataFrame, - patch_size: Tuple[float, float] = (4, 4), - n_cols: int = 3, + patch_size: Tuple[float, float] = (5, 5), + n_cols: int = 5, ) -> plt.Figure: """ Plots threshold characteristics for various datasets. This function takes results @@ -445,11 +449,11 @@ def plot_threshold_characteristics( """ def plot_threshold_characteristics_func(data: pd.DataFrame, ax: plt.Axes, **kwargs): - cols = data.loc[data.index[0], "curves"].columns + cols = data.loc[data.index[0], "characteristics"].columns unfolded = {col: [] for col in cols} for i in data.index: for col in cols: - unfolded[col].append(data.loc[i, "curves"][col]) + unfolded[col].append(data.loc[i, "characteristics"][col]) colors = ["green", "red", "blue", "orange"] for (c_id, lst), color in zip(unfolded.items(), colors): @@ -463,7 +467,7 @@ def plot_threshold_characteristics_func(data: pd.DataFrame, ax: plt.Axes, **kwar n_cols=n_cols, legend=False, xlabel="Threshold", - ylabel="%", + ylabel="Fraction", grid=True, x_lims=[0.007, 0.000020, 0.01, 0.0055, 0.035, 0.0055, 0.003, 0.0055, 0.005], ) as fig: diff --git a/src/re_classwise_shapley/utils.py b/src/re_classwise_shapley/utils.py index ad14a323..f6d4a88b 100644 --- a/src/re_classwise_shapley/utils.py +++ b/src/re_classwise_shapley/utils.py @@ -133,7 +133,7 @@ def linear_dataframe_to_table( def calculate_threshold_characteristic_curves( in_cls_mar_acc: NDArray[np.float_], - out_of_cls_mar_acc: NDArray[np.float_], + global_mar_acc: NDArray[np.float_], n_thresholds: int = 100, ) -> pd.DataFrame: """ @@ -143,49 +143,25 @@ def calculate_threshold_characteristic_curves( Args: in_cls_mar_acc: In-class marginal accuracies. - out_of_cls_mar_acc: Out-of-class marginal accuracies. + global_mar_acc: Global marginal accuracies. n_thresholds: Number of thresholds to use for calculating the curve. Returns: A pd.DataFrame with all four characteristic curves. """ - max_x = np.max(np.maximum(np.abs(in_cls_mar_acc), np.abs(out_of_cls_mar_acc))) + max_x = np.max(np.maximum(np.abs(in_cls_mar_acc), np.abs(global_mar_acc))) x_axis = np.linspace(0, max_x, n_thresholds) - characteristics = pd.DataFrame(index=x_axis, columns=["<,<", "<,>", ">,<", ">,>"]) + characteristics = pd.DataFrame(index=x_axis, columns=["1:1", "1:2", "1:3", "1:4"]) n_data = len(in_cls_mar_acc) for i, threshold in enumerate(characteristics.index): - characteristics.iloc[i, 0] = ( - np.sum( - np.logical_and( - in_cls_mar_acc < -threshold, out_of_cls_mar_acc < -threshold + for k in range(4): + characteristics.iloc[i, k] = ( + np.sum( + in_cls_mar_acc < -threshold & global_mar_acc > (k + 1) * threshold ) + / n_data ) - / n_data - ) - characteristics.iloc[i, 1] = ( - np.sum( - np.logical_and( - in_cls_mar_acc < -threshold, out_of_cls_mar_acc > threshold - ) - ) - / n_data - ) - characteristics.iloc[i, 2] = ( - np.sum( - np.logical_and( - in_cls_mar_acc > threshold, out_of_cls_mar_acc < -threshold - ) - ) - / n_data - ) - characteristics.iloc[i, 3] = ( - np.sum( - np.logical_and( - in_cls_mar_acc > threshold, out_of_cls_mar_acc > threshold - ) - ) - / n_data - ) + return characteristics diff --git a/src/re_classwise_shapley/valuation_methods.py b/src/re_classwise_shapley/valuation_methods.py index 03f74d33..faf0c67d 100644 --- a/src/re_classwise_shapley/valuation_methods.py +++ b/src/re_classwise_shapley/valuation_methods.py @@ -249,8 +249,4 @@ def calculate_subset_score( subset_mar_acc = np.take_along_axis( subset_mar_acc, data_set.y_train.reshape([-1, 1]), axis=1 ).reshape(-1) - subset_stats = { - "mean": np.mean(subset_mar_acc), - "std": np.std(subset_mar_acc), - } - return subset_mar_acc, subset_stats + return subset_mar_acc