From 7d0826f5e88b1ac6b408a1e144c1e9520a2c07e6 Mon Sep 17 00:00:00 2001 From: henrikfo Date: Mon, 23 Sep 2024 15:44:28 +0000 Subject: [PATCH 01/14] Report Handler --- config/audit.yaml | 11 +- config/dev_config/cifar10.yaml | 8 +- leakpro/attacks/mia_attacks/lira.py | 3 +- leakpro/metrics/attack_result.py | 54 ++++++++- leakpro/reporting/audit_report.py | 2 + leakpro/reporting/report_handler.py | 177 ++++++++++++++++++++++++++++ leakpro/reporting/utils.py | 2 +- leakpro_main.py | 22 +++- 8 files changed, 261 insertions(+), 18 deletions(-) create mode 100644 leakpro/reporting/report_handler.py diff --git a/config/audit.yaml b/config/audit.yaml index f9ea4a4d..e64658a6 100644 --- a/config/audit.yaml +++ b/config/audit.yaml @@ -16,11 +16,10 @@ audit: # Configurations for auditing # population: # attack_data_fraction: 0.1 # Fraction of the auxilary dataset to use for this attack # lira: - # training_data_fraction: 0.1 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) - # num_shadow_models: 8 # Number of shadow models to train - # online: False # perform online or offline attack - # fixed_variance: True # Use a fixed variance for the whole audit - # boosting: True + # training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) + # num_shadow_models: 4 # Number of shadow models to train + # online: True # perform online or offline attack + # boosting: False # loss_traj: # training_distill_data_fraction : 0.2 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2 # number_of_traj: 1 # Number of epochs (number of points in the loss trajectory) @@ -79,7 +78,7 @@ shadow_model: # Name of the class to instantiate from the specified file model_class: "ResNet18" #"ConvNet" batch_size: 256 - epochs: 1 + epochs: 3 optimizer: name: sgd #adam, sgd, rmsprop diff --git a/config/dev_config/cifar10.yaml b/config/dev_config/cifar10.yaml index efa586d2..d8c71e26 100644 --- a/config/dev_config/cifar10.yaml +++ b/config/dev_config/cifar10.yaml @@ -5,8 +5,8 @@ run: # Configurations for a specific run train: # Configuration for training type: pytorch # Training framework (we only support pytorch now). num_target_model: 1 #Integer number for indicating how many target models we want to audit for the privacy game - epochs: 10 # Integer number for indicating the epochs for training target model. For speedyresnet, it uses its own number of epochs. - batch_size: 128 # Integer number for indicating batch size for training the target model. For speedyresnet, it uses its own batch size. + epochs: 3 # Integer number for indicating the epochs for training target model. For speedyresnet, it uses its own number of epochs. + batch_size: 256 # Integer number for indicating batch size for training the target model. For speedyresnet, it uses its own batch size. optimizer: SGD # String which indicates the optimizer. We support Adam and SGD. For speedyresnet, it uses its own optimizer. learning_rate: 0.01 # Float number for indicating learning rate for training the target model. For speedyresnet, it uses its own learning_rate. momentum: 0.9 @@ -16,8 +16,8 @@ train: # Configuration for training data: # Configuration for data dataset: cifar10 # String indicates the name of the dataset - f_train: 0.1 # Float number from 0 to 1 indicating the fraction of the train dataset - f_test: 0.1 # Float number from 0 to 1 indicating the size of the test set + f_train: 0.4999 # Float number from 0 to 1 indicating the fraction of the train dataset + f_test: 0.4999 # Float number from 0 to 1 indicating the size of the test set data_dir: ./target/data # String about where to save the data. diff --git a/leakpro/attacks/mia_attacks/lira.py b/leakpro/attacks/mia_attacks/lira.py index 1204d3f1..0b61141d 100644 --- a/leakpro/attacks/mia_attacks/lira.py +++ b/leakpro/attacks/mia_attacks/lira.py @@ -290,7 +290,7 @@ def run_attack(self:Self) -> CombinedMetricResult: score[i] = (pr_in - pr_out) # Append the calculated probability density value to the score list # Generate thresholds based on the range of computed scores for decision boundaries - self.thresholds = np.linspace(np.min(score), np.max(score), 2000) + self.thresholds = np.linspace(np.min(score), np.max(score), 1000) # Split the score array into two parts based on membership: in (training) and out (non-training) self.in_member_signals = score[self.in_members].reshape(-1,1) # Scores for known training data members @@ -317,4 +317,5 @@ def run_attack(self:Self) -> CombinedMetricResult: true_labels=true_labels, predictions_proba=None, # Note: Direct probability predictions are not computed here signal_values=signal_values, + # masks = masks ) diff --git a/leakpro/metrics/attack_result.py b/leakpro/metrics/attack_result.py index c13f223a..d1d42088 100644 --- a/leakpro/metrics/attack_result.py +++ b/leakpro/metrics/attack_result.py @@ -1,7 +1,7 @@ """Contains the AttackResult class, which stores the results of an attack.""" import os - +import json import numpy as np from sklearn.metrics import ( accuracy_score, @@ -100,11 +100,14 @@ def __init__( # noqa: PLR0913 threshold: Threshold computed by the metric. """ + # TODO REDIFINE THE CLASS SO IT DOSE NOT STORE MATRICIES BUT VECTORS + self.predicted_labels = predicted_labels self.true_labels = true_labels self.predictions_proba = predictions_proba self.signal_values = signal_values self.threshold = threshold + self.id = None self.accuracy = np.mean(predicted_labels == true_labels, axis=1) self.tn = np.sum(true_labels == 0) - np.sum( @@ -124,6 +127,39 @@ def __init__( # noqa: PLR0913 self.roc_auc = auc(self.fpr, self.tpr) + def _get_primitives(self:Self): + """Return the primitives of the CombinedMetricResult class.""" + return {"predicted_labels": self.predicted_labels.tolist(), + "true_labels": self.true_labels.tolist(), + "predictions_proba": self.predictions_proba.tolist() if isinstance(self.predictions_proba, np.ndarray) else None, + "signal_values": self.signal_values.tolist() if isinstance(self.signal_values, np.ndarray) else None, + "threshold": self.threshold.tolist() if isinstance(self.threshold, np.ndarray) else None, + } + + def save(self:Self, path: str, name: str, config:dict): + """Save the CombinedMetricResult class to disk.""" + + # Primitives are the set of variables to re-create the class from scratch + primitives = self._get_primitives() + + # Data to be saved + data = { + "resulttype": self.__class__.__name__, + "primitives": primitives, + "config": config + } + + # Get the name for the attack configuration + config_name = get_config_name(config["attack_list"][name]) + + # Check if path exists, otherwise create it. + if not os.path.exists(f'{path}/{name}/{name}{config_name}'): + os.makedirs(f'{path}/{name}/{name}{config_name}') + + # Save the results to a file + with open(f'{path}/{name}/{name}{config_name}/data.json', 'w') as f: + json.dump(data, f) + def __str__(self:Self) -> str: """Return a string describing the metric result.""" txt_list = [] @@ -174,3 +210,19 @@ def extract_tensors_from_subset(dataset: Dataset) -> Tensor: save_image(gt_denormalized, os.path.join(save_path, "original_image.png")) return attack_name + +def get_config_name(config): + config = dict(sorted(config.items())) + + exclude = ["attack_data_dir"] + + config_name = "" + for key, value in zip(list(config.keys()), list(config.values())): + if key in exclude: + pass + else: + if type(value) is bool: + config_name += f"-{key}" + else: + config_name += f"-{key}={value}" + return config_name \ No newline at end of file diff --git a/leakpro/reporting/audit_report.py b/leakpro/reporting/audit_report.py index f9c7644c..09974877 100644 --- a/leakpro/reporting/audit_report.py +++ b/leakpro/reporting/audit_report.py @@ -224,6 +224,8 @@ def generate_report( verticalalignment="center", bbox={"facecolor": "white", "alpha": 0.5}, ) + plt.xlim(left=1e-5) + plt.ylim(bottom=1e-5) if save: plt.savefig(fname=filename, dpi=1000) if show: diff --git a/leakpro/reporting/report_handler.py b/leakpro/reporting/report_handler.py new file mode 100644 index 00000000..1a0dfef0 --- /dev/null +++ b/leakpro/reporting/report_handler.py @@ -0,0 +1,177 @@ +import json +import logging +import numpy as np +import os +import subprocess + +# from leakpro.reporting.utils import get_config_name +from leakpro.metrics.attack_result import CombinedMetricResult + +import matplotlib.pyplot as plt + +def load_mia_results(path: str, name: str): + with open(f'{path}/{name}_data') as f: + result_data = json.load(f) + return result_data + +# Report Handler +class report_handler(): + """Implementation of the report handler.""" + + def __init__(self, report_dir: str, logger:logging.Logger) -> None: + self.logger = logger + self.report_dir = report_dir + self.image_paths = [] + + def save_results(self, attack_name: str, result_data: dict, config: dict) -> None: + """Save attack results. """ + + self.logger.info(f'Saving results for {attack_name}') + result_data.save(self.report_dir, attack_name, config) + + def load_results(self): + self.results = [] + for parentdir in os.scandir(f"{self.report_dir}"): + if parentdir.is_dir(): + for subdir in os.scandir(f"{self.report_dir}/{parentdir.name}"): + if subdir.is_dir(): + try: + with open(f"{self.report_dir}/{parentdir.name}/{subdir.name}/data.json") as f: + data = json.load(f) + + # Extract class name and data + resulttype = data["resulttype"] + primitives = data["primitives"] + config = data["config"] + + # Dynamically get the class from its name (resulttype) + # This assumes that the class is already defined in the current module or imported + if resulttype in globals() and callable(globals()[resulttype]): + cls = globals()[resulttype] + else: + raise ValueError(f"Class '{resulttype}' not found.") + + # Initialize the class using the saved primitives + instance = cls( + predicted_labels=np.array(primitives["predicted_labels"]), + true_labels=np.array(primitives["true_labels"]), + predictions_proba=np.array(primitives["predictions_proba"]) if primitives["predictions_proba"] is not None else None, + signal_values=np.array(primitives["signal_values"]) if primitives["signal_values"] is not None else None, + threshold=np.array(primitives["threshold"]) if primitives["threshold"] is not None else None, + ) + instance.config = config + instance.id = subdir.name + instance.resultname = parentdir.name + self.results.append(instance) + + except Exception as e: + self.logger.info(f"Not able to load data, Error: {e}") + + def _plot_merged_results( + self, + merged_results, + title = "ROC curve", + save_name = "", + ): + + filename = f"{self.report_dir}/{save_name}" + + for res in merged_results: + + fpr = res.fp / (res.fp + res.tn) + tpr = res.tp / (res.tp + res.fn) + + range01 = np.linspace(0, 1) + plt.fill_between(fpr, tpr, alpha=0.15) + plt.plot(fpr, tpr, label=res.id) + + plt.plot(range01, range01, "--", label="Random guess") + plt.yscale("log") + plt.xscale("log") + plt.tight_layout() + plt.grid() + plt.legend() + plt.xlabel("False positive rate (FPR)") + plt.ylabel("True positive rate (TPR)") + plt.title(title) + plt.xlim(left=1e-5) + plt.ylim(bottom=1e-5) + plt.savefig(fname=f"{filename}.png", dpi=1000, bbox_inches='tight') + plt.clf() + return filename + + def _get_results_of_name(self, results, resultname_value) -> list: + indices = [idx for (idx, result) in enumerate(results) if result.resultname == resultname_value] + return [results[idx] for idx in indices] + + def _get_all_attacknames(self): + attack_name_list = [] + for result in self.results: + if result.resultname not in attack_name_list: + attack_name_list.append(result.resultname) + return attack_name_list + + def create_results_all(self) -> None: + names = self._plot_merged_results(merged_results=self.results, save_name="all_results") + self.image_paths.append(names) + pass + + def get_strongest(self, results) -> list: + return max((res for res in results), key=lambda d: d.roc_auc) + + def create_results_strong(self): + attack_name_grouped_results = [self._get_results_of_name(self.results, name) for name in self._get_all_attacknames()] + strongest_results = [self.get_strongest(attack_name) for attack_name in attack_name_grouped_results] + names = self._plot_merged_results(merged_results=strongest_results, save_name="strongest_attacks") + self.image_paths.append(names) + pass + + def create_results_attackname_grouped(self): + all_attack_names = self._get_all_attacknames() + print(all_attack_names) + for name in all_attack_names: + attack_results = self._get_results_of_name(self.results, name) + names = self._plot_merged_results(merged_results=attack_results, save_name="all_"+name) + self.image_paths.append(names) + pass + + # TODO: Make other useful groupings of results + def create_results_numshadowmodels(self): + pass + + def create_report(self): + self._init_pdf() + + for image in self.image_paths: + self._append_to_pdf(image_path=image) + + self._compile_pdf() + pass + + def _init_pdf(self,): + self.latex_content = f""" + \\documentclass{{article}} + \\usepackage{{graphicx}} + + \\begin{{document}} + """ + pass + + def _append_to_pdf(self, image_path=None, table=None): + self.latex_content += f""" + \\begin{{figure}}[ht] + \\includegraphics[width=0.9\\textwidth]{{{image_path}.png}} + \\end{{figure}} + """ + pass + + def _compile_pdf(self): + self.latex_content += f""" + \\end{{document}} + """ + with open(f'{self.report_dir}/LeakPro_output.tex', 'w') as f: + f.write(self.latex_content) + + cmd = ['pdflatex', '-interaction', 'nonstopmode', f'{self.report_dir}/LeakPro_output.tex'] + proc = subprocess.Popen(cmd) + pass \ No newline at end of file diff --git a/leakpro/reporting/utils.py b/leakpro/reporting/utils.py index 9573d272..b078c000 100644 --- a/leakpro/reporting/utils.py +++ b/leakpro/reporting/utils.py @@ -42,4 +42,4 @@ def prepare_privacy_risk_report( metric_result=audit_results, save=True, filename=f"{save_path}/Histogram.png", - ) + ) \ No newline at end of file diff --git a/leakpro_main.py b/leakpro_main.py index 3023798a..753f6652 100644 --- a/leakpro_main.py +++ b/leakpro_main.py @@ -23,7 +23,7 @@ ) from leakpro.attacks.attack_scheduler import AttackScheduler from leakpro.dataset import get_dataloader -from leakpro.reporting.utils import prepare_privacy_risk_report +from leakpro.reporting.report_handler import report_handler from leakpro.utils.handler_logger import setup_log @@ -100,14 +100,26 @@ def generate_user_input(configs: dict, retrain: bool = False, logger: logging.Lo attack_scheduler = AttackScheduler(handler) audit_results = attack_scheduler.run_attacks() + # Initiate report handler + ReportHandler = report_handler(report_dir=report_dir, logger=logger) + for attack_name in audit_results: logger.info(f"Preparing results for attack: {attack_name}") - prepare_privacy_risk_report( - audit_results[attack_name]["result_object"], - configs["audit"], - save_path=f"{report_dir}/{attack_name}", + # Save results to be used later + ReportHandler.save_results( + attack_name=attack_name, + result_data=audit_results[attack_name]["result_object"], + config=configs["audit"] ) + + # Create report from the saved results + ReportHandler.load_results() + ReportHandler.create_results_all() + ReportHandler.create_results_strong() + ReportHandler.create_results_attackname_grouped() + ReportHandler.create_report() + # ------------------------------------------------ # Save the configs and user_configs config_log_path = configs["audit"]["config_log"] From 7e5dc2c67e2f833651158814c0be51b185736e83 Mon Sep 17 00:00:00 2001 From: henrikfo Date: Tue, 22 Oct 2024 20:49:51 +0000 Subject: [PATCH 02/14] Adding report handler --- leakpro/attacks/mia_attacks/lira.py | 35 +- leakpro/metrics/attack_result.py | 506 +++++++++++++++++- leakpro/reporting/audit_report.py | 2 +- leakpro/reporting/report_handler.py | 206 +++---- tests/test_attack_result.py/__init__.py | 1 + .../test_attack_result.py | 149 ++++++ tests/test_report_handler/__init__.py | 1 + .../test_report_handler.py | 77 +++ 8 files changed, 862 insertions(+), 115 deletions(-) create mode 100644 tests/test_attack_result.py/__init__.py create mode 100644 tests/test_attack_result.py/test_attack_result.py create mode 100644 tests/test_report_handler/__init__.py create mode 100644 tests/test_report_handler/test_report_handler.py diff --git a/leakpro/attacks/mia_attacks/lira.py b/leakpro/attacks/mia_attacks/lira.py index 0b61141d..5a243863 100644 --- a/leakpro/attacks/mia_attacks/lira.py +++ b/leakpro/attacks/mia_attacks/lira.py @@ -7,8 +7,8 @@ from leakpro.attacks.mia_attacks.abstract_mia import AbstractMIA from leakpro.attacks.utils.boosting import Memorization from leakpro.attacks.utils.shadow_model_handler import ShadowModelHandler -from leakpro.input_handler.abstract_input_handler import AbstractInputHandler -from leakpro.metrics.attack_result import CombinedMetricResult +from leakpro.import_helper import Self +from leakpro.metrics.attack_result import CombinedMetricResult, MIAResult from leakpro.signals.signal import ModelRescaledLogits from leakpro.utils.import_helper import Self from leakpro.utils.logger import logger @@ -132,7 +132,7 @@ def prepare_attack(self:Self)->None: mask = (num_shadow_models_seen_points > 0) & (num_shadow_models_seen_points < self.num_shadow_models) # Filter the audit data - audit_data_indices = self.audit_dataset["data"][mask] + self.audit_dataset["data"] = self.audit_dataset["data"][mask] self.in_indices_masks = self.in_indices_masks[mask, :] # Filter IN and OUT members @@ -140,15 +140,16 @@ def prepare_attack(self:Self)->None: num_out_members = np.sum(mask[self.audit_dataset["out_members"]]) self.out_members = np.arange(len(self.in_members), len(self.in_members) + num_out_members) - assert len(audit_data_indices) == len(self.in_members) + len(self.out_members) + assert len(self.audit_dataset["data"]) == len(self.in_members) + len(self.out_members) - if len(audit_data_indices) == 0: + if len(self.audit_dataset["data"]) == 0: raise ValueError("No points in the audit dataset are used for the shadow models") else: - audit_data_indices = self.audit_dataset["data"] + self.audit_dataset["data"] = self.audit_dataset["data"] self.in_members = self.audit_dataset["in_members"] self.out_members = self.audit_dataset["out_members"] + # mask = [True if indice in self.in_members else False for indice in self.audit_dataset["data"]] # Check offline attack for possible IN- sample(s) if not self.online: @@ -157,14 +158,14 @@ def prepare_attack(self:Self)->None: logger.info(f"Some shadow model(s) contains {count_in_samples} IN samples in total for the model(s)") logger.info("This is not an offline attack!") - self.batch_size = len(audit_data_indices) - logger.info(f"Calculating the logits for all {self.num_shadow_models} shadow models") - self.shadow_models_logits = np.swapaxes(self.signal(self.shadow_models, self.handler, audit_data_indices,\ + self.batch_size = 20000 #int(len(self.audit_dataset["data"])/2) + self.logger.info(f"Calculating the logits for all {self.num_shadow_models} shadow models") + self.shadow_models_logits = np.swapaxes(self.signal(self.shadow_models, self.handler, self.audit_dataset["data"],\ self.batch_size), 0, 1) # Calculate logits for the target model - logger.info("Calculating the logits for the target model") - self.target_logits = np.swapaxes(self.signal([self.target_model], self.handler, audit_data_indices, self.batch_size),\ + self.logger.info("Calculating the logits for the target model") + self.target_logits = np.swapaxes(self.signal([self.target_model], self.handler, self.audit_dataset["data"], self.batch_size),\ 0, 1).squeeze() # Using Memorizationg boosting @@ -172,8 +173,8 @@ def prepare_attack(self:Self)->None: # Prepare for memorization org_audit_data_length = self.audit_dataset["data"].size - audit_data_indices = self.audit_dataset["data"][mask] if self.online else self.audit_dataset["data"] - audit_data_labels = self.handler.get_labels(audit_data_indices) + self.audit_dataset["data"] = self.audit_dataset["data"][mask] if self.online else self.audit_dataset["data"] + audit_data_labels = self.handler.get_labels(self.audit_dataset["data"]) logger.info("Running memorization") memorization = Memorization( @@ -184,7 +185,7 @@ def prepare_attack(self:Self)->None: self.in_indices_masks, self.shadow_models, self.target_model, - audit_data_indices, + self.audit_dataset["data"], audit_data_labels, org_audit_data_length, self.handler, @@ -312,10 +313,10 @@ def run_attack(self:Self) -> CombinedMetricResult: signal_values = np.concatenate([self.in_member_signals, self.out_member_signals]) # Return a result object containing predictions, true labels, and the signal values for further evaluation - return CombinedMetricResult( + return MIAResult( predicted_labels=predictions, true_labels=true_labels, - predictions_proba=None, # Note: Direct probability predictions are not computed here + predictions_proba=None, signal_values=signal_values, - # masks = masks + audit_indices=self.audit_dataset["data"], ) diff --git a/leakpro/metrics/attack_result.py b/leakpro/metrics/attack_result.py index d1d42088..8d20da2e 100644 --- a/leakpro/metrics/attack_result.py +++ b/leakpro/metrics/attack_result.py @@ -1,8 +1,12 @@ """Contains the AttackResult class, which stores the results of an attack.""" +from collections import defaultdict import os import json import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sn from sklearn.metrics import ( accuracy_score, auc, @@ -175,19 +179,281 @@ def __str__(self:Self) -> str: txt_list.append("\n".join(txt)) return "\n\n".join(txt_list) +class MIAResult: + """Contains results related to the performance of the metric. It contains the results for multiple fpr.""" + + def __init__( # noqa: PLR0913 + self:Self, + predicted_labels: list=None, + true_labels: list=None, + predictions_proba:list=None, + signal_values:list=None, + threshold: list = None, + audit_indices: list = None, + metadata: dict = None, + resultname: str = None, + id: str = None, + load: bool = False, + )-> None: + """Compute and store the accuracy, ROC AUC score, and the confusion matrix for a metric. + + Args: + ---- + predicted_labels: Membership predictions of the metric. + true_labels: True membership labels used to evaluate the metric. + predictions_proba: Continuous version of the predicted_labels. + signal_values: Values of the signal used by the metric. + threshold: Threshold computed by the metric. + + """ + + self.predicted_labels = predicted_labels + self.true_labels = true_labels + self.predictions_proba = predictions_proba + self.signal_values = signal_values + self.threshold = threshold + self.audit_indices = audit_indices + self.metadata = metadata + self.resultname = resultname + self.id = id + + if load: + return + + self.tn = np.sum(true_labels == 0) - np.sum( + predicted_labels[:, true_labels == 0], axis=1 + ) + self.tp = np.sum(predicted_labels[:, true_labels == 1], axis=1) + self.fp = np.sum(predicted_labels[:, true_labels == 0], axis=1) + self.fn = np.sum(true_labels == 1) - np.sum( + predicted_labels[:, true_labels == 1], axis=1 + ) + + self.fpr = self.fp / (self.fp + self.tn) + self.tpr = self.tp / (self.tp + self.fn) + self.roc_auc = auc(self.fpr, self.tpr) + + def load(self, data): + self.resultname = data["resultname"] + self.resulttype = data["resulttype"] + self.tpr = data["tpr"] + self.fpr = data["fpr"] + self.roc_auc = data["roc_auc"] + self.config = data["config"] + self.fixed_fpr_table = data["fixed_fpr"] + self.audit_indices = data["audit_indices"] + self.signal_values = data["signal_values"] + self.true_labels = data["true_labels"] + self.threshold = data["threshold"] + + def save(self:Self, path: str, name: str, config:dict = None): + """Save the MIAResults to disk.""" + + print(config) + + result_config = config["attack_list"][name] + fixed_fpr_table = get_result_fixed_fpr(self.fpr, self.tpr) + + # Get the name for the attack configuration + config_name = get_config_name(result_config) + + self.id = f"{name}{config_name}" + save_path = f"{path}/{name}/{self.id}" + + # Data to be saved + data = { + "resulttype": self.__class__.__name__, + "resultname": name, + "tpr": self.tpr.tolist(), + "fpr": self.fpr.tolist(), + "roc_auc": self.roc_auc, + "config": config, + "fixed_fpr": fixed_fpr_table, + "audit_indices": self.audit_indices.tolist(), + "signal_values": self.signal_values.tolist(), + "true_labels": self.true_labels.tolist(), + "threshold": self.threshold.tolist() if self.threshold is not None else None, + "id": name, + } + + # Check if path exists, otherwise create it. + if not os.path.exists(save_path): + os.makedirs(save_path) + + # Save the results to a file + with open(f'{save_path}/data.json', 'w') as f: + json.dump(data, f) + + # Create ROC plot for MIAResult + filename = f'{save_path}/ROC' + temp_res = MIAResult(load=True) + temp_res.tpr = self.tpr + temp_res.fpr = self.fpr + temp_res.id = self.id + self.create_plot(results = [temp_res], + filename = filename + ) + + # Create SignalHistogram plot for MIAResult + filename = f'{save_path}/SignalHistogram.png' + self.create_signal_histogram(filename = filename, + signal_values = self.signal_values, + true_labels = self.true_labels, + threshold = self.threshold + ) + + def get_strongest(self, results) -> list: + """Method for selecting the strongest attack.""" + return max((res for res in results), key=lambda d: d.roc_auc) + + def create_signal_histogram(self, filename, signal_values, true_labels, threshold) -> None: + + values = np.array(signal_values).ravel() + labels = np.array(true_labels).ravel() + threshold = threshold + + data = pd.DataFrame( + { + "Signal": values, + "Membership": ["Member" if y == 1 else "Non-member" for y in labels], + } + ) + + bin_edges = np.histogram_bin_edges(values, bins=1000) + + histogram = sn.histplot( + data=data, + x="Signal", + hue="Membership", + element="step", + kde=True, + bins = bin_edges + ) + + if threshold is not None and isinstance(threshold, float): + histogram.axvline(x=threshold, linestyle="--", color="C{}".format(2)) + histogram.text( + x=threshold - (np.max(values) - np.min(values)) / 30, + y=0.8, + s="Threshold", + rotation=90, + color="C{}".format(2), + transform=histogram.get_xaxis_transform(), + ) + + plt.grid() + plt.xlabel("Signal value") + plt.ylabel("Number of samples") + plt.title("Signal histogram") + plt.savefig(fname=filename, dpi=1000) + plt.clf() + + def create_plot(self, results, filename = "", save_name = "") -> None: + + # Create plot for results + reduced_labels = reduce_to_unique_labels(results) + for res, label in zip(results, reduced_labels): + + plt.fill_between(res.fpr, res.tpr, alpha=0.15) + plt.plot(res.fpr, res.tpr, label=label) + + # Plot random guesses + range01 = np.linspace(0, 1) + plt.plot(range01, range01, "--", label="Random guess") + + # Set plot parameters + plt.yscale("log") + plt.xscale("log") + plt.xlim(left=1e-5) + plt.ylim(bottom=1e-5) + plt.tight_layout() + plt.grid() + plt.legend(bbox_to_anchor =(0.5,-0.27), loc='lower center') + + plt.xlabel("False positive rate (FPR)") + plt.ylabel("True positive rate (TPR)") + plt.title(save_name+"ROC Curve") + plt.savefig(fname=f"{filename}.png", dpi=1000, bbox_inches='tight') + plt.clf() + + def create_results( + self: Self, + results: list, + save_dir: str = "./", + save_name: str = "foo", + ): + + filename = f"{save_dir}/{save_name}" + + self.create_plot(results, filename, save_name) + + return self._latex(results, save_name, filename) + + def _latex(self, results, subsection, filename): + """Latex method for MIAResult.""" + + latex_content = "" + latex_content += f""" + \\subsection{{{" ".join(subsection.split("_"))}}} + \\begin{{figure}}[ht] + \\includegraphics[width=0.8\\textwidth]{{{filename}.png}} + \\end{{figure}} + """ + + latex_content += f''' + \\resizebox{{\\linewidth}}{{!}}{{% + \\begin{{tabularx}}{{\\textwidth}}{{l c l l l l}} + Attack name & attack config & TPR: 1.0\\%FPR & 0.1\\%FPR & 0.01\\%FPR & 0.0\\%FPR \\\\ + \\hline + ''' + + def config_latex_style(config): + config = " \\\\ ".join(config.split("-")[1:]) + config = "-".join(config.split("_")) + return f"""\\shortstack{{{config}}}""" + + for res in results: + config = config_latex_style(res.id) + latex_content += f'''{"-".join(res.resultname.split("_"))} & {config} & {res.fixed_fpr_table["TPR@1.0%FPR"]} & {res.fixed_fpr_table["TPR@0.1%FPR"]} & {res.fixed_fpr_table["TPR@0.01%FPR"]} & {res.fixed_fpr_table["TPR@0.0%FPR"]} \\\\ \\hline + ''' + latex_content += f""" + \\end{{tabularx}} + }} + \\newline + """ + return latex_content + + + class GIAResults: """Contains results for a GIA attack.""" def __init__(self: Self, original_data: DataLoader, recreated_data: DataLoader, - psnr_score: float, data_mean: float, data_std: float) -> None: + psnr_score: float, data_mean: float, data_std: float, load: bool) -> None: self.original_data = original_data self.recreated_data = recreated_data self.PSNR_score = psnr_score self.data_mean = data_mean self.data_std = data_std - def prepare_privacy_risk_report(self: Self, attack_name: str, save_path: str) -> None: - """Risk report for GIA. WIP.""" + if load: + return + + def load(self, data): + self.original = data["original"] + self.resulttype = data["resulttype"] + self.recreated = data["recreated"] + self.id = data["id"] + + def save(self: Self, save_path: str, name: str, config: dict): + """Save the GIAResults to disk.""" + + result_config = config["attack_list"][name] + + # Get the name for the attack configuration + config_name = get_config_name(result_config) + self.id = f"{name}{config_name}" + save_path = f"{save_path}/{name}/{self.id}" def extract_tensors_from_subset(dataset: Dataset) -> Tensor: all_tensors = [] @@ -204,12 +470,181 @@ def extract_tensors_from_subset(dataset: Dataset) -> Tensor: original_data = extract_tensors_from_subset(self.original_data.dataset) output_denormalized = clamp(recreated_data * self.data_std + self.data_mean, 0, 1) - save_image(output_denormalized, os.path.join(save_path, "recreated_image.png")) + recreated = os.path.join(save_path, "recreated_image.png") + save_image(output_denormalized, recreated) gt_denormalized = clamp(original_data * self.data_std + self.data_mean, 0, 1) - save_image(gt_denormalized, os.path.join(save_path, "original_image.png")) + original = os.path.join(save_path, "original_image.png") + save_image(gt_denormalized, original) + + # Data to be saved + data = { + "resulttype": self.__class__.__name__, + "original": original, + "recreated": recreated, + "id": self.id, + } + + # Check if path exists, otherwise create it. + if not os.path.exists(f'{save_path}'): + os.makedirs(f'{save_path}') - return attack_name + # Save the results to a file + with open(f'{save_path}/data.json', 'w') as f: + json.dump(data, f) + + pass + + def create_result(self: Self, attack_name: str, save_path: str) -> None: + """Result method for GIA.""" + + def _latex(attack_name, original, recreated): + latex_content = f""" + \\subsection{{{" ".join(attack_name.split("_"))}}} + \\begin{{figure}}[ht] + \\includegraphics[width=0.8\\textwidth]{{{original}}} + \\caption{{Original}} + \\end{{figure}} + + \\begin{{figure}}[ht] + \\includegraphics[width=0.8\\textwidth]{{{recreated}}} + \\caption{{Original}} + \\end{{figure}} + """ + return latex_content + + return _latex(attack_name=attack_name, original=save_path+"recreated_image.png", recreated=save_path+"original_image.png") + +class SyntheticResult: + """Contains results related to the performance of the metric. It contains the results for multiple fpr.""" + + def __init__( # noqa: PLR0913 + self:Self, + load: bool = False, + )-> None: + """Initalze Result method + + Args: + ---- + """ + # Initialize values to result object + # self.values = values + + # Have a method to return if the results are to be loaded + if load: + return + + # Create some result + # self.result_values = some_result + + def load(self, data: dict): + """Load the TEMPLATEResult class to disk.""" + # self.result_values = data["some_result"] + pass + + def save(self:Self, path: str, name: str, config:dict = None): + """Save the TEMPLATEResult class to disk.""" + + result_config = config["attack_list"][name] + + # Data to be saved + data = { + "some_result": self.result_values + } + + # Get the name for the attack configuration + config_name = get_config_name(result_config) + self.id = f"{name}{config_name}" + save_path = f'{path}/{name}/{self.id}' + + # Check if path exists, otherwise create it. + if not os.path.exists(f'{save_path}'): + os.makedirs(f'{save_path}') + + # Save the results to a file + with open(f'{save_path}/data.json', 'w') as f: + json.dump(data, f) + +class TEMPLATEResult: + """Contains results related to the performance of the metric. It contains the results for multiple fpr.""" + + def __init__( # noqa: PLR0913 + self:Self, + load: bool = False, + )-> None: + """Initalze Result method + + Args: + ---- + """ + # Initialize values to result object + # self.values = values + + # Have a method to return if the results are to be loaded + if load: + return + + # Create some result + # self.result_values = some_result + + def load(self, data: dict): + """Load the TEMPLATEResult class to disk.""" + # self.result_values = data["some_result"] + pass + + def save(self:Self, path: str, name: str, config:dict = None): + """Save the TEMPLATEResult class to disk.""" + + result_config = config["attack_list"][name] + + # Data to be saved + data = { + "some_result": self.result_values + } + + # Get the name for the attack configuration + config_name = get_config_name(result_config) + self.id = f"{name}{config_name}" + + # Check if path exists, otherwise create it. + if not os.path.exists(f'{path}/{name}/{self.id}'): + os.makedirs(f'{path}/{name}/{self.id}') + + # Save the results to a file + with open(f'{path}/{name}/{self.id}/data.json', 'w') as f: + json.dump(data, f) + + def create_result(self, results): + """Method for results.""" + def _latex(results): + """Latex method for TEMPLATEResult""" + pass + pass + + def create_result(self, results): + """Method for results.""" + def _latex(results): + """Latex method for TEMPLATEResult""" + pass + pass + +def get_result_fixed_fpr(fpr, tpr): + + # Function to find TPR at given FPR thresholds + def find_tpr_at_fpr(fpr_array:np.ndarray, tpr_array:np.ndarray, threshold:float): #-> Optional[str]: + try: + # Find the last index where FPR is less than the threshold + valid_index = np.where(fpr_array < threshold)[0][-1] + return float(f"{tpr_array[valid_index] * 100:.4f}") + except IndexError: + # Return None or some default value if no valid index found + return "N/A" + + # Compute TPR values at various FPR thresholds + return {"TPR@1.0%FPR": find_tpr_at_fpr(fpr, tpr, 0.01), + "TPR@0.1%FPR": find_tpr_at_fpr(fpr, tpr, 0.001), + "TPR@0.01%FPR": find_tpr_at_fpr(fpr, tpr, 0.0001), + "TPR@0.0%FPR": find_tpr_at_fpr(fpr, tpr, 0.0)} def get_config_name(config): config = dict(sorted(config.items())) @@ -225,4 +660,61 @@ def get_config_name(config): config_name += f"-{key}" else: config_name += f"-{key}={value}" - return config_name \ No newline at end of file + return config_name + +def reduce_to_unique_labels(results): + """Reduce very long labels to unique and distinct ones.""" + strings = [res.id for res in results] + + # Dictionary to store name as key and a list of configurations as value + name_configs = defaultdict(list) + + # Parse each string and store configurations + for s in strings: + parts = s.split('-') + name = parts[0] # The first part is the name + config = '-'.join(parts[1:]) if len(parts) > 1 else '' # The rest is the configuration + name_configs[name].append(config) # Store the configuration under the name + + def find_common_suffix(configs): + """Helper function to find the common suffix among multiple configurations""" + if not configs: + return '' + + # Split each configuration by "-" and zip them in reverse to compare backwards + reversed_configs = [config.split('-')[::-1] for config in configs] + common_suffix = [] + + for elements in zip(*reversed_configs): + if all(e == elements[0] for e in elements): + common_suffix.append(elements[0]) + else: + break + + # Return the common suffix as a string, reversed back to normal order + return '-'.join(common_suffix[::-1]) + + result = [] + + # Process each name and its configurations + for name, configs in name_configs.items(): + if len(configs) > 1: + # Find the common suffix for the configurations + common_suffix = find_common_suffix(configs) + + # Remove the common suffix from each configuration + trimmed_configs = [config[:-(len(common_suffix) + 1)] if common_suffix and config.endswith(common_suffix) else config for config in configs] + + # Process configurations based on whether they share the same pattern + for config in trimmed_configs: + if config: + result.append(f"{name}-{config}") + else: + result.append(name) + else: + # If only one configuration, just return the string as is + result.append(f"{name}") + + return result + + diff --git a/leakpro/reporting/audit_report.py b/leakpro/reporting/audit_report.py index 09974877..0a3b9e82 100644 --- a/leakpro/reporting/audit_report.py +++ b/leakpro/reporting/audit_report.py @@ -648,7 +648,7 @@ def read_and_parse_data(filename:str) -> dict: return data # Main logic to process and save results -def fixed_fpr_results(fpr:np.ndarray, tpr:np.ndarray, configs:dict, filename:str) -> None: +def fixed_fpr_results(fpr:np.ndarray, tpr:np.ndarray, configs:dict, filename:str = None) -> None: """Compute and save fixed FPR results. Args: diff --git a/leakpro/reporting/report_handler.py b/leakpro/reporting/report_handler.py index 1a0dfef0..067c3202 100644 --- a/leakpro/reporting/report_handler.py +++ b/leakpro/reporting/report_handler.py @@ -4,24 +4,26 @@ import os import subprocess -# from leakpro.reporting.utils import get_config_name -from leakpro.metrics.attack_result import CombinedMetricResult +from leakpro.metrics.attack_result import CombinedMetricResult, MIAResult import matplotlib.pyplot as plt -def load_mia_results(path: str, name: str): - with open(f'{path}/{name}_data') as f: - result_data = json.load(f) - return result_data - # Report Handler -class report_handler(): +class ReportHandler(): """Implementation of the report handler.""" def __init__(self, report_dir: str, logger:logging.Logger) -> None: self.logger = logger self.report_dir = report_dir - self.image_paths = [] + self.pdf_results = {} + self.leakpro_types = ["MIAResult", + "GIAResults", + "SyntheticResult" + ] + + # Initiate empty lists for the different types of LeakPro attack types + for key in self.leakpro_types: + self.pdf_results[key] = [] def save_results(self, attack_name: str, result_data: dict, config: dict) -> None: """Save attack results. """ @@ -41,8 +43,6 @@ def load_results(self): # Extract class name and data resulttype = data["resulttype"] - primitives = data["primitives"] - config = data["config"] # Dynamically get the class from its name (resulttype) # This assumes that the class is already defined in the current module or imported @@ -52,54 +52,20 @@ def load_results(self): raise ValueError(f"Class '{resulttype}' not found.") # Initialize the class using the saved primitives - instance = cls( - predicted_labels=np.array(primitives["predicted_labels"]), - true_labels=np.array(primitives["true_labels"]), - predictions_proba=np.array(primitives["predictions_proba"]) if primitives["predictions_proba"] is not None else None, - signal_values=np.array(primitives["signal_values"]) if primitives["signal_values"] is not None else None, - threshold=np.array(primitives["threshold"]) if primitives["threshold"] is not None else None, - ) - instance.config = config - instance.id = subdir.name - instance.resultname = parentdir.name + instance = cls(load=True) + instance.load(data) + + if instance.id is None: + instance.id = subdir.name + + if instance.resultname is None: + instance.resultname = parentdir.name + self.results.append(instance) except Exception as e: self.logger.info(f"Not able to load data, Error: {e}") - def _plot_merged_results( - self, - merged_results, - title = "ROC curve", - save_name = "", - ): - - filename = f"{self.report_dir}/{save_name}" - - for res in merged_results: - - fpr = res.fp / (res.fp + res.tn) - tpr = res.tp / (res.tp + res.fn) - - range01 = np.linspace(0, 1) - plt.fill_between(fpr, tpr, alpha=0.15) - plt.plot(fpr, tpr, label=res.id) - - plt.plot(range01, range01, "--", label="Random guess") - plt.yscale("log") - plt.xscale("log") - plt.tight_layout() - plt.grid() - plt.legend() - plt.xlabel("False positive rate (FPR)") - plt.ylabel("True positive rate (TPR)") - plt.title(title) - plt.xlim(left=1e-5) - plt.ylim(bottom=1e-5) - plt.savefig(fname=f"{filename}.png", dpi=1000, bbox_inches='tight') - plt.clf() - return filename - def _get_results_of_name(self, results, resultname_value) -> list: indices = [idx for (idx, result) in enumerate(results) if result.resultname == resultname_value] return [results[idx] for idx in indices] @@ -112,66 +78,126 @@ def _get_all_attacknames(self): return attack_name_list def create_results_all(self) -> None: - names = self._plot_merged_results(merged_results=self.results, save_name="all_results") - self.image_paths.append(names) - pass - - def get_strongest(self, results) -> list: - return max((res for res in results), key=lambda d: d.roc_auc) + for result_type in self.leakpro_types: + try: + # Get all results of type "Result" + results = [res for res in self.results if res.resulttype == result_type] + + # If no results of type "result_type" is found, skip to next result_type + if len(results) == 0: + self.logger.info(f"No results of type {result_type} found.") + continue + + # Create all results + merged_result = results[0].create_results(results=results, save_dir=self.report_dir, save_name="all_results") + self.pdf_results[result_type].append(merged_result) + + except Exception as e: + print("all", e) def create_results_strong(self): - attack_name_grouped_results = [self._get_results_of_name(self.results, name) for name in self._get_all_attacknames()] - strongest_results = [self.get_strongest(attack_name) for attack_name in attack_name_grouped_results] - names = self._plot_merged_results(merged_results=strongest_results, save_name="strongest_attacks") - self.image_paths.append(names) - pass + for result_type in self.leakpro_types: + try: + # Get all results of type "Result" + results = [res for res in self.results if res.resulttype == result_type] + + # If no results of type "result_type" is found, skip to next result_type + if len(results) == 0: + self.logger.info(f"No \'strong\' results of type {result_type} found.") + continue + + # Get all attack names + attack_name_grouped_results = [self._get_results_of_name(results, name) for name in self._get_all_attacknames()] + + # Get the strongest result for each attack name + strongest_results = [result[0].get_strongest(result) for result in attack_name_grouped_results] + + # Create the strongest results + merged_result = results[0].create_results(results=strongest_results, save_dir=self.report_dir, save_name="strong_results") + self.pdf_results[result_type].append(merged_result) + + except Exception as e: + print("results_strong", e) def create_results_attackname_grouped(self): + # Get all attack names all_attack_names = self._get_all_attacknames() - print(all_attack_names) - for name in all_attack_names: - attack_results = self._get_results_of_name(self.results, name) - names = self._plot_merged_results(merged_results=attack_results, save_name="all_"+name) - self.image_paths.append(names) - pass - # TODO: Make other useful groupings of results - def create_results_numshadowmodels(self): - pass + for result_type in self.leakpro_types: + + # Get all results of type "Result" + results = [res for res in self.results if res.resulttype == result_type] + + # If no results of type "result_type" is found, skip to next result_type + if len(results) == 0: + self.logger.info(f"No results of type {result_type} found.") + continue + + for name in all_attack_names: + + try: + # Get result for each attack names + attack_results = self._get_results_of_name(results, name) + + # Create results + merged_result = attack_results[0].create_results(results=attack_results, save_dir=self.report_dir, save_name="grouped_"+name) + self.pdf_results[result_type].append(merged_result) + + except Exception as e: + print("create_results_attackname_grouped", e) def create_report(self): + """Method to create PDF report""" + + # Create initial part of the document. self._init_pdf() - for image in self.image_paths: - self._append_to_pdf(image_path=image) + # Append all results to the document + for result_type in self.leakpro_types: + if len(self.pdf_results[result_type]) > 0: + self.latex_content += f"""\\section{{{result_type}}}""" + for res in self.pdf_results[result_type]: + self.latex_content += res + # Compile the PDF self._compile_pdf() - pass def _init_pdf(self,): self.latex_content = f""" \\documentclass{{article}} + \\usepackage{{tabularx}} \\usepackage{{graphicx}} - + \\usepackage{{graphics}} \\begin{{document}} """ - pass - def _append_to_pdf(self, image_path=None, table=None): - self.latex_content += f""" - \\begin{{figure}}[ht] - \\includegraphics[width=0.9\\textwidth]{{{image_path}.png}} - \\end{{figure}} - """ - pass + def _compile_pdf(self, install_flag: bool = False): + """Method to compile PDF.""" - def _compile_pdf(self): self.latex_content += f""" \\end{{document}} """ with open(f'{self.report_dir}/LeakPro_output.tex', 'w') as f: f.write(self.latex_content) - cmd = ['pdflatex', '-interaction', 'nonstopmode', f'{self.report_dir}/LeakPro_output.tex'] - proc = subprocess.Popen(cmd) - pass \ No newline at end of file + # Check if pdflatex is installed + try: + check = subprocess.check_output(["which", "pdflatex"], universal_newlines=True) + assert "pdflatex" in check + except: + # Option to install pdflatex + self.logger.info("Could not find pdflatex installed\nPlease install pdflatex with \"apt install texlive-latex-base\"") + choice = input("Do you want to install pdflatex? (Y/n): ").lower() + if (choice in {"y", "yes"} or install_flag==True): + proc = subprocess.Popen(["apt", "install", "-y", "texlive-latex-base"], stdout=subprocess.DEVNULL) + proc.communicate() + + # Compile PDF if possible + try: + cmd = ['pdflatex', '-interaction', 'nonstopmode', f'{self.report_dir}/LeakPro_output.tex'] + proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL) + proc.communicate() + self.logger.info("PDF compiled") + except Exception as e: + print(e) + self.logger.info("Could not compile PDF") \ No newline at end of file diff --git a/tests/test_attack_result.py/__init__.py b/tests/test_attack_result.py/__init__.py new file mode 100644 index 00000000..c4bfdc70 --- /dev/null +++ b/tests/test_attack_result.py/__init__.py @@ -0,0 +1 @@ +"""Init file for attack result tests""" \ No newline at end of file diff --git a/tests/test_attack_result.py/test_attack_result.py b/tests/test_attack_result.py/test_attack_result.py new file mode 100644 index 00000000..dab9ce27 --- /dev/null +++ b/tests/test_attack_result.py/test_attack_result.py @@ -0,0 +1,149 @@ +import unittest +import os +import json +import logging +import subprocess +import tempfile +from unittest.mock import MagicMock, patch, mock_open, call +from leakpro.metrics.attack_result import * + +class TestMIAResult(unittest.TestCase): + + def setUp(self) -> None: + """Set up temporary directory and logger for MIAResult.""" + self.temp_dir = tempfile.TemporaryDirectory() + + predicted_labels = np.array([[False, False, False, False, False, False], + [False, False, False, False, False, False], + [False, False, True, False, False, False], + [False, True, True, False, True, False], + [ True, True, True, True, True, True], + [ True, True, True, True, True, True], + [ True, True, True, True, True, True], + [ True, True, True, True, True, True], + [ True, True, True, True, True, True], + [ True, True, True, True, True, True]]) + true_labels = np.ones((6)) + signal_values = np.array([[-0.00614866], + [-0.45619705], + [-2.30781003], + [ 0.46973035], + [-0.1584589 ], + [ 0.14289466]]) + + + predictions_proba = None + threshold = None + audit_indices = np.array([0, 1, 2, 3]) + resultname = None + id = None + + self.miaresult = MIAResult(predicted_labels = predicted_labels, + true_labels = true_labels, + signal_values = signal_values, + predictions_proba = predictions_proba, + threshold = threshold, + audit_indices = audit_indices, + resultname = resultname, + id = id) + + + + self.config = {'random_seed': 1234, 'attack_list': + {'lira': + {'training_data_fraction': 0.5, + 'num_shadow_models': 3, + 'online': True} + }, + 'report_log': + './leakpro_output/results', + 'config_log': + './leakpro_output/config', + 'target_model_folder': + './target', + 'attack_folder': + 'attack_objects', + 'attack_type': + 'mia', + 'split_method': + 'no_overlapping' + } + + def tearDown(self) -> None: + """Clean up temporary directory.""" + self.temp_dir.cleanup() + + def test_MIAResult_init(self) -> None: + """Test the initialization of MIAResult.""" + assert self.miaresult.id == None + + def test_check_tpr_fpr(self): + assert np.allclose(self.miaresult.tpr, np.array([0., 0., 0.16666667, 0.5, 1., 1., 1., 1., 1., 1.])) + assert self.miaresult.fp.all() == 0. + assert self.miaresult.tn.all() == 0. + + def test_save_load_MIAResult(self) -> None: + + name = "lira" + config_name = get_config_name(self.config['attack_list'][name]) + save_path = f"{self.temp_dir}/{name}/{name}{config_name}" + + # Test saving + self.miaresult.save(self.temp_dir, name, self.config) + + assert os.path.isdir(save_path) + assert os.path.exists(f"{save_path}/data.json") + assert os.path.exists(f"{save_path}/ROC.png") + assert os.path.exists(f"{save_path}/SignalHistogram.png") + + # Test loading + with open(f"{save_path}/data.json") as f: + data = json.load(f) + + self.miaresult_new = MIAResult(load=True) + assert self.miaresult_new.predicted_labels == None + assert self.miaresult_new.true_labels == None + assert self.miaresult_new.signal_values == None + + self.miaresult_new.load(data) + assert np.allclose(self.miaresult_new.tpr, np.array([0., 0., 0.16666667, 0.5, 1., 1., 1., 1., 1., 1.])) + + def test_get_strongest_MIAResult(self) -> None: + """Test selecting the strongest attack based on ROC AUC.""" + result_1 = MagicMock(roc_auc=0.75) + result_2 = MagicMock(roc_auc=0.85) + result_3 = MagicMock(roc_auc=0.65) + + mia_result = MIAResult(load=True) + strongest = mia_result.get_strongest([result_1, result_2, result_3]) + + # The strongest attack should be the one with the highest ROC AUC + assert strongest == result_2 + + def test_latex(self): + """Test if the LaTeX content is generated correctly.""" + + result = [MagicMock(id="attack-config-1", resultname="test_attack_1", fixed_fpr_table={"TPR@1.0%FPR": 0.90, "TPR@0.1%FPR": 0.80, "TPR@0.01%FPR": 0.70, "TPR@0.0%FPR": 0.60})] + subsection = "attack_comparison" + filename = f"{self.temp_dir}/test.png" + + latex_content = MIAResult(load=True)._latex(result, subsection, filename) + + # Check that the subsection is correctly included + self.assertIn("\\subsection{attack comparison}", latex_content) + + # Check that the figure is correctly included + self.assertIn(f"\\includegraphics[width=0.8\\textwidth]{{{filename}.png}}", latex_content) + + # Check that the table header is correct + self.assertIn("Attack name & attack config & TPR: 1.0\\%FPR & 0.1\\%FPR & 0.01\\%FPR & 0.0\\%FPR", latex_content) + + # Check if the results for mock_result are included correctly + self.assertIn("test-attack-1", latex_content) + self.assertIn("0.9", latex_content) + self.assertIn("0.8", latex_content) + self.assertIn("0.7", latex_content) + self.assertIn("0.6", latex_content) + + # Ensure the LaTeX content ends properly + self.assertIn("\\newline\n", latex_content) \ No newline at end of file diff --git a/tests/test_report_handler/__init__.py b/tests/test_report_handler/__init__.py new file mode 100644 index 00000000..796d15a0 --- /dev/null +++ b/tests/test_report_handler/__init__.py @@ -0,0 +1 @@ +"""Init file for report handler tests""" \ No newline at end of file diff --git a/tests/test_report_handler/test_report_handler.py b/tests/test_report_handler/test_report_handler.py new file mode 100644 index 00000000..9ed6b952 --- /dev/null +++ b/tests/test_report_handler/test_report_handler.py @@ -0,0 +1,77 @@ +import unittest +import os +import json +import logging +import subprocess +import tempfile +from unittest.mock import MagicMock, patch, mock_open, call +from leakpro.reporting.report_handler import ReportHandler +from leakpro.metrics.attack_result import * + +class TestReportHandler(unittest.TestCase): + + def setUp(self) -> None: + """Set up temporary directory and logger for ReportHandler.""" + self.temp_dir = tempfile.TemporaryDirectory() + self.logger = logging.getLogger('test_logger') + self.logger.setLevel(logging.INFO) + self.report_handler = ReportHandler(report_dir=self.temp_dir.name, logger=self.logger) + + def tearDown(self) -> None: + """Clean up temporary directory.""" + self.temp_dir.cleanup() + + def test_report_handler_initialization(self) -> None: + """Test the initialization of ReportHandler.""" + assert self.report_handler is not None + assert self.report_handler.report_dir == self.temp_dir.name + assert isinstance(self.report_handler.logger, logging.Logger) + + types = ["MIAResult", "GIAResults", "SyntheticResult"] + assert False not in [_type in types for _type in self.report_handler.leakpro_types] + assert True not in [True if self.report_handler.pdf_results[key] else False for key in self.report_handler.leakpro_types] + assert False not in [_type in globals() for _type in types] + + def test_init_pdf(self) -> None: + assert hasattr(self.report_handler, 'latex_content') == False + + self.report_handler._init_pdf() + assert "documentclass" in self.report_handler.latex_content + assert "begin" in self.report_handler.latex_content + + def test_compile_pdf(self) -> None: + """Test PDF compilation.""" + + self.report_handler._init_pdf() + self.report_handler._compile_pdf(install_flag=True) + + assert "end" in self.report_handler.latex_content + assert os.path.isfile(f'{self.report_handler.report_dir}/LeakPro_output.tex') + assert os.path.isfile(f'./LeakPro_output.pdf') + + def test_get_all_attacknames(self) -> None: + """Test retrieval of all attack names.""" + result_mock_1 = MagicMock(resultname="Attack1") + result_mock_2 = MagicMock(resultname="Attack2") + self.report_handler.results = [result_mock_1, result_mock_2, result_mock_1] + + attack_names = self.report_handler._get_all_attacknames() + + assert attack_names == ["Attack1", "Attack2"] + + def test_get_results_of_name(self): + + """Test retrieval of all attack names.""" + result_mock_1 = MagicMock(resultname="Attack1") + result_mock_2 = MagicMock(resultname="Attack2") + result_mock_3 = MagicMock(resultname="Attack2") + result_mock_4 = MagicMock(resultname="Attack3") + result_mock_5 = MagicMock(resultname="Attack3") + result_mock_6 = MagicMock(resultname="Attack3") + + self.report_handler.results = [result_mock_1, result_mock_2, result_mock_3, + result_mock_4, result_mock_5, result_mock_6] + + assert len(self.report_handler._get_results_of_name(self.report_handler.results, "Attack1")) == 1 + assert len(self.report_handler._get_results_of_name(self.report_handler.results, "Attack2")) == 2 + assert len(self.report_handler._get_results_of_name(self.report_handler.results, "Attack3")) == 3 \ No newline at end of file From cf3a3433a461a4dd953efcf0f6760a32b174dda4 Mon Sep 17 00:00:00 2001 From: henrikfo Date: Tue, 22 Oct 2024 22:29:15 +0000 Subject: [PATCH 03/14] ruff check --fix --- examples/mia/tabular_mia/adult_handler.py | 9 +- examples/mia/tabular_mia/main.ipynb | 35 ++--- .../utils/adult_data_preparation.py | 45 +++--- .../utils/adult_model_preparation.py | 27 ++-- .../synthetic_data/anomalies_example.ipynb | 13 +- .../synthetic_data/inference_example.ipynb | 1 + .../synthetic_data/linkability_example.ipynb | 1 + .../synthetic_data/singling_out_example.ipynb | 1 + leakpro/attacks/mia_attacks/lira.py | 2 +- leakpro/metrics/attack_result.py | 130 +++++++++--------- leakpro/reporting/report_handler.py | 48 +++---- leakpro/reporting/utils.py | 2 +- leakpro/tests/conftest.py | 29 ++-- .../input_handler/image_input_handler.py | 3 +- leakpro/tests/input_handler/image_utils.py | 6 +- .../input_handler/tabular_input_handler.py | 9 +- leakpro/tests/input_handler/tabular_utils.py | 40 +++--- .../input_handler/test_tabular_handler.py | 14 +- .../tests/mia_attacks/attacks/test_lira.py | 49 +++---- .../utils/test_shadow_model_handler.py | 36 ++--- .../tests/test_attack_result}/__init__.py | 0 .../test_attack_result}/test_attack_result.py | 0 .../tests}/test_report_handler/__init__.py | 0 .../test_report_handler.py | 0 24 files changed, 255 insertions(+), 245 deletions(-) rename {tests/test_attack_result.py => leakpro/tests/test_attack_result}/__init__.py (100%) rename {tests/test_attack_result.py => leakpro/tests/test_attack_result}/test_attack_result.py (100%) rename {tests => leakpro/tests}/test_report_handler/__init__.py (100%) rename {tests => leakpro/tests}/test_report_handler/test_report_handler.py (100%) diff --git a/examples/mia/tabular_mia/adult_handler.py b/examples/mia/tabular_mia/adult_handler.py index cf03361c..5ee74387 100644 --- a/examples/mia/tabular_mia/adult_handler.py +++ b/examples/mia/tabular_mia/adult_handler.py @@ -8,6 +8,7 @@ from leakpro import AbstractInputHandler + class AdultInputHandler(AbstractInputHandler): """Class to handle the user input for the CIFAR10 dataset.""" @@ -41,11 +42,11 @@ def train( criterion = self.get_criterion() optimizer = self.get_optimizer(model) - + for e in tqdm(range(epochs), desc="Training Progress"): model.train() train_acc, train_loss = 0.0, 0.0 - + for data, target in dataloader: target = target.float().unsqueeze(1) data, target = data.to(dev, non_blocking=True), target.to(dev, non_blocking=True) @@ -55,11 +56,11 @@ def train( loss = criterion(output, target) pred = sigmoid(output) >= 0.5 train_acc += pred.eq(target).sum().item() - + loss.backward() optimizer.step() train_loss += loss.item() - + train_acc = train_acc/len(dataloader.dataset) train_loss = train_loss/len(dataloader) diff --git a/examples/mia/tabular_mia/main.ipynb b/examples/mia/tabular_mia/main.ipynb index 5ca090cf..96dbfb87 100644 --- a/examples/mia/tabular_mia/main.ipynb +++ b/examples/mia/tabular_mia/main.ipynb @@ -34,10 +34,13 @@ "project_root = os.path.abspath(os.path.join(os.getcwd(), \"../../..\"))\n", "sys.path.append(project_root)\n", "\n", - "from examples.mia.tabular_mia.utils.adult_data_preparation import preprocess_adult_dataset, get_adult_dataloaders, download_adult_dataset\n", + "from examples.mia.tabular_mia.utils.adult_data_preparation import (\n", + " download_adult_dataset,\n", + " get_adult_dataloaders,\n", + " preprocess_adult_dataset,\n", + ")\n", "from examples.mia.tabular_mia.utils.adult_model_preparation import AdultNet, create_trained_model_and_metadata\n", "\n", - "\n", "# Generate the dataset and dataloaders\n", "path = os.path.join(os.getcwd(), \"data/\")\n", "\n", @@ -52,9 +55,9 @@ "if not os.path.exists(\"target\"):\n", " os.makedirs(\"target\")\n", "model = AdultNet(input_size=n_features, hidden_size=64, num_classes=n_classes)\n", - "train_acc, train_loss, test_acc, test_loss = create_trained_model_and_metadata(model, \n", - " train_loader, \n", - " test_loader, \n", + "train_acc, train_loss, test_acc, test_loss = create_trained_model_and_metadata(model,\n", + " train_loader,\n", + " test_loader,\n", " epochs=10)" ] }, @@ -81,20 +84,20 @@ "plt.figure(figsize=(5, 4))\n", "\n", "plt.subplot(1, 2, 1)\n", - "plt.plot(train_acc, label='Train Accuracy')\n", - "plt.plot(test_acc, label='Test Accuracy')\n", - "plt.xlabel('Epoch')\n", - "plt.ylabel('Accuracy')\n", - "plt.title('Accuracy over Epochs')\n", + "plt.plot(train_acc, label=\"Train Accuracy\")\n", + "plt.plot(test_acc, label=\"Test Accuracy\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Accuracy\")\n", + "plt.title(\"Accuracy over Epochs\")\n", "plt.legend()\n", "\n", "# Plot training and test loss\n", "plt.subplot(1, 2, 2)\n", - "plt.plot(train_loss, label='Train Loss')\n", - "plt.plot(test_loss, label='Test Loss')\n", - "plt.xlabel('Epoch')\n", - "plt.ylabel('Loss')\n", - "plt.title('Loss over Epochs')\n", + "plt.plot(train_loss, label=\"Train Loss\")\n", + "plt.plot(test_loss, label=\"Test Loss\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Loss\")\n", + "plt.title(\"Loss over Epochs\")\n", "plt.legend()\n", "\n", "plt.tight_layout()\n", @@ -142,7 +145,7 @@ "# Prepare leakpro object\n", "leakpro = LeakPro(AdultInputHandler, config_path)\n", "\n", - "# Run the audit \n", + "# Run the audit\n", "leakpro.run_audit()" ] }, diff --git a/examples/mia/tabular_mia/utils/adult_data_preparation.py b/examples/mia/tabular_mia/utils/adult_data_preparation.py index 4cb92c1e..d553d034 100644 --- a/examples/mia/tabular_mia/utils/adult_data_preparation.py +++ b/examples/mia/tabular_mia/utils/adult_data_preparation.py @@ -1,25 +1,26 @@ import os +import pickle +import urllib.request + +import joblib import numpy as np import pandas as pd -import joblib -import pickle -from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder from sklearn.model_selection import train_test_split -import urllib.request -from torch.utils.data import Dataset, Subset, DataLoader -from torch import tensor, float32 +from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler +from torch import float32, tensor +from torch.utils.data import DataLoader, Dataset, Subset class AdultDataset(Dataset): def __init__(self, x:tensor, y:tensor, dec_to_onehot:dict, one_hot_encoded:bool=True): self.x = x self.y = y - + # create dictionary to map between indices in categorical representation and one-hot encoded representation # For example: cols 1,2 continuous and col 3 categorical with 3 categories will be mapped to {1:1,2:2,3:[3,4,5]} self.dec_to_onehot = dec_to_onehot self.one_hot_encoded = one_hot_encoded - + def __len__(self): return len(self.y) @@ -28,8 +29,8 @@ def __getitem__(self, idx): def subset(self, indices): return AdultDataset(self.x[indices], self.y[indices], self.dec_to_onehot, self.one_hot_encoded) - - + + def download_adult_dataset(data_dir): """Download the Adult Dataset if it's not present.""" # URLs for the dataset @@ -54,22 +55,22 @@ def download_adult_dataset(data_dir): def preprocess_adult_dataset(path): """Get the dataset, download it if necessary, and store it.""" - + if os.path.exists(os.path.join(path, "adult_data.pkl")): with open(os.path.join(path, "adult_data.pkl"), "rb") as f: dataset = joblib.load(f) - else: + else: column_names = [ - "age", "workclass", "fnlwgt", "education", "education-num", + "age", "workclass", "fnlwgt", "education", "education-num", "marital-status", "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss", "hours-per-week", "native-country", "income", ] - + # Load and clean data df_train = pd.read_csv(os.path.join(path, "adult.data"), names=column_names) df_test = pd.read_csv(os.path.join(path, "adult.test"), names=column_names, header=0) df_test["income"] = df_test["income"].str.replace(".", "", regex=False) - + df_concatenated = pd.concat([df_train, df_test], axis=0) df_clean = df_concatenated.replace(" ?", np.nan).dropna() @@ -83,19 +84,19 @@ def preprocess_adult_dataset(path): # Scaling numerical features scaler = StandardScaler() x_numerical = pd.DataFrame(scaler.fit_transform(x[numerical_features]), columns=numerical_features, index=x.index) - + # Label encode the categories one_hot_encoder = OneHotEncoder(sparse_output=False) x_categorical_one_hot = one_hot_encoder.fit_transform(x[categorical_features]) one_hot_feature_names = one_hot_encoder.get_feature_names_out(categorical_features) x_categorical_one_hot_df = pd.DataFrame(x_categorical_one_hot, columns=one_hot_feature_names, index=x.index) - + # Concatenate the numerical and one-hot encoded categorical features x_final = pd.concat([x_numerical, x_categorical_one_hot_df], axis=1) # Label encode the target variable y = pd.Series(LabelEncoder().fit_transform(y)) - + # Add numerical features to the dictionary dec_to_onehot_mapping = {} for i, feature in enumerate(numerical_features): @@ -115,11 +116,11 @@ def preprocess_adult_dataset(path): with open(f"{path}/adult_data.pkl", "wb") as file: pickle.dump(dataset, file) print(f"Save data to {path}.pkl") - + return dataset def get_adult_dataloaders(dataset, train_fraction=0.3, test_fraction=0.3): - + dataset_size = len(dataset) train_size = int(train_fraction * dataset_size) test_size = int(test_fraction * dataset_size) @@ -127,10 +128,10 @@ def get_adult_dataloaders(dataset, train_fraction=0.3, test_fraction=0.3): # Use sklearn's train_test_split to split into train and test indices selected_index = np.random.choice(np.arange(dataset_size), train_size + test_size, replace=False) train_indices, test_indices = train_test_split(selected_index, test_size=test_size) - + train_subset = Subset(dataset, train_indices) test_subset = Subset(dataset, test_indices) - + train_loader = DataLoader(train_subset, batch_size=128, shuffle=True) test_loader = DataLoader(test_subset, batch_size=128, shuffle=False) diff --git a/examples/mia/tabular_mia/utils/adult_model_preparation.py b/examples/mia/tabular_mia/utils/adult_model_preparation.py index a7af914f..3f58f2ac 100644 --- a/examples/mia/tabular_mia/utils/adult_model_preparation.py +++ b/examples/mia/tabular_mia/utils/adult_model_preparation.py @@ -1,8 +1,9 @@ -import torch.nn as nn -from torch import device, optim, cuda, no_grad, save, sigmoid import pickle + +from torch import cuda, device, nn, no_grad, optim, save, sigmoid from tqdm import tqdm + class AdultNet(nn.Module): def __init__(self, input_size, hidden_size, num_classes): super(AdultNet, self).__init__() @@ -13,7 +14,7 @@ def __init__(self, input_size, hidden_size, num_classes): self.relu = nn.ReLU() self.fc2 = nn.Linear(hidden_size, hidden_size) self.fc3 = nn.Linear(hidden_size, num_classes) - + def forward(self, x): out = self.fc1(x) out = self.relu(out) @@ -47,11 +48,11 @@ def create_trained_model_and_metadata(model, train_loader, test_loader, epochs = optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.8) train_losses, train_accuracies = [], [] test_losses, test_accuracies = [], [] - + for e in tqdm(range(epochs), desc="Training Progress"): model.train() train_acc, train_loss = 0.0, 0.0 - + for data, target in train_loader: target = target.float().unsqueeze(1) data, target = data.to(device_name, non_blocking=True), target.to(device_name, non_blocking=True) @@ -61,17 +62,17 @@ def create_trained_model_and_metadata(model, train_loader, test_loader, epochs = loss = criterion(output, target) pred = sigmoid(output) >= 0.5 train_acc += pred.eq(target).sum().item() - + loss.backward() optimizer.step() train_loss += loss.item() - + train_loss /= len(train_loader) train_acc /= len(train_loader.dataset) - + train_losses.append(train_loss) train_accuracies.append(train_acc) - + test_loss, test_acc = evaluate(model, test_loader, criterion, device_name) test_losses.append(test_loss) test_accuracies.append(test_acc) @@ -86,12 +87,12 @@ def create_trained_model_and_metadata(model, train_loader, test_loader, epochs = meta_data["train_indices"] = train_loader.dataset.indices meta_data["test_indices"] = test_loader.dataset.indices meta_data["num_train"] = len(meta_data["train_indices"]) - + # Write init params meta_data["init_params"] = {} for key, value in model.init_params.items(): meta_data["init_params"][key] = value - + # read out optimizer parameters meta_data["optimizer"] = {} meta_data["optimizer"]["name"] = optimizer.__class__.__name__.lower() @@ -112,8 +113,8 @@ def create_trained_model_and_metadata(model, train_loader, test_loader, epochs = meta_data["train_loss"] = train_loss meta_data["test_loss"] = test_loss meta_data["dataset"] = "adult" - + with open("target/model_metadata.pkl", "wb") as f: pickle.dump(meta_data, f) - + return train_accuracies, train_losses, test_accuracies, test_losses diff --git a/examples/synthetic_data/anomalies_example.ipynb b/examples/synthetic_data/anomalies_example.ipynb index 5e435ce0..249958b5 100644 --- a/examples/synthetic_data/anomalies_example.ipynb +++ b/examples/synthetic_data/anomalies_example.ipynb @@ -24,16 +24,17 @@ "outputs": [], "source": [ "import os\n", + "import sys\n", + "\n", "import pandas as pd\n", "\n", - "import sys\n", "sys.path.append(\"../..\")\n", "\n", + "from leakpro.synthetic_data_attacks import plots\n", "from leakpro.synthetic_data_attacks.anomalies import return_anomalies\n", - "from leakpro.synthetic_data_attacks.linkability_utils import linkability_risk_evaluation\n", "from leakpro.synthetic_data_attacks.inference_utils import inference_risk_evaluation\n", + "from leakpro.synthetic_data_attacks.linkability_utils import linkability_risk_evaluation\n", "from leakpro.synthetic_data_attacks.singling_out_utils import singling_out_risk_evaluation\n", - "from leakpro.synthetic_data_attacks import plots\n", "\n", "#Get ori and syn\n", "n_samples = 100\n", @@ -98,7 +99,7 @@ } ], "source": [ - "print('Syn anom shape',syn_anom.shape)" + "print(\"Syn anom shape\",syn_anom.shape)" ] }, { @@ -128,7 +129,7 @@ ], "source": [ "sin_out_res = singling_out_risk_evaluation(\n", - " dataset = 'adults',\n", + " dataset = \"adults\",\n", " ori = ori,\n", " syn = syn_anom,\n", " n_attacks = syn_anom.shape[0]\n", @@ -199,7 +200,7 @@ ], "source": [ "inf_res = inference_risk_evaluation(\n", - " dataset = 'adults',\n", + " dataset = \"adults\",\n", " ori = ori,\n", " syn = syn_anom,\n", " worst_case_flag = True,\n", diff --git a/examples/synthetic_data/inference_example.ipynb b/examples/synthetic_data/inference_example.ipynb index 2904789e..bb7f0498 100644 --- a/examples/synthetic_data/inference_example.ipynb +++ b/examples/synthetic_data/inference_example.ipynb @@ -17,6 +17,7 @@ "source": [ "import os\n", "import sys\n", + "\n", "import pandas as pd\n", "\n", "sys.path.append(\"../..\")\n", diff --git a/examples/synthetic_data/linkability_example.ipynb b/examples/synthetic_data/linkability_example.ipynb index 48337a1c..2d2bf576 100644 --- a/examples/synthetic_data/linkability_example.ipynb +++ b/examples/synthetic_data/linkability_example.ipynb @@ -17,6 +17,7 @@ "source": [ "import os\n", "import sys\n", + "\n", "import pandas as pd\n", "\n", "sys.path.append(\"../..\")\n", diff --git a/examples/synthetic_data/singling_out_example.ipynb b/examples/synthetic_data/singling_out_example.ipynb index b5337016..7413df53 100644 --- a/examples/synthetic_data/singling_out_example.ipynb +++ b/examples/synthetic_data/singling_out_example.ipynb @@ -17,6 +17,7 @@ "source": [ "import os\n", "import sys\n", + "\n", "import pandas as pd\n", "\n", "sys.path.append(\"../..\")\n", diff --git a/leakpro/attacks/mia_attacks/lira.py b/leakpro/attacks/mia_attacks/lira.py index 5a243863..de1edd7c 100644 --- a/leakpro/attacks/mia_attacks/lira.py +++ b/leakpro/attacks/mia_attacks/lira.py @@ -316,7 +316,7 @@ def run_attack(self:Self) -> CombinedMetricResult: return MIAResult( predicted_labels=predictions, true_labels=true_labels, - predictions_proba=None, + predictions_proba=None, signal_values=signal_values, audit_indices=self.audit_dataset["data"], ) diff --git a/leakpro/metrics/attack_result.py b/leakpro/metrics/attack_result.py index 8d20da2e..dfa8ad90 100644 --- a/leakpro/metrics/attack_result.py +++ b/leakpro/metrics/attack_result.py @@ -1,10 +1,11 @@ """Contains the AttackResult class, which stores the results of an attack.""" -from collections import defaultdict -import os import json -import numpy as np +import os +from collections import defaultdict + import matplotlib.pyplot as plt +import numpy as np import pandas as pd import seaborn as sn from sklearn.metrics import ( @@ -104,7 +105,7 @@ def __init__( # noqa: PLR0913 threshold: Threshold computed by the metric. """ - # TODO REDIFINE THE CLASS SO IT DOSE NOT STORE MATRICIES BUT VECTORS + # TODO REDIFINE THE CLASS SO IT DOSE NOT STORE MATRICIES BUT VECTORS self.predicted_labels = predicted_labels self.true_labels = true_labels @@ -133,13 +134,13 @@ def __init__( # noqa: PLR0913 def _get_primitives(self:Self): """Return the primitives of the CombinedMetricResult class.""" - return {"predicted_labels": self.predicted_labels.tolist(), + return {"predicted_labels": self.predicted_labels.tolist(), "true_labels": self.true_labels.tolist(), - "predictions_proba": self.predictions_proba.tolist() if isinstance(self.predictions_proba, np.ndarray) else None, + "predictions_proba": self.predictions_proba.tolist() if isinstance(self.predictions_proba, np.ndarray) else None, "signal_values": self.signal_values.tolist() if isinstance(self.signal_values, np.ndarray) else None, "threshold": self.threshold.tolist() if isinstance(self.threshold, np.ndarray) else None, } - + def save(self:Self, path: str, name: str, config:dict): """Save the CombinedMetricResult class to disk.""" @@ -157,11 +158,11 @@ def save(self:Self, path: str, name: str, config:dict): config_name = get_config_name(config["attack_list"][name]) # Check if path exists, otherwise create it. - if not os.path.exists(f'{path}/{name}/{name}{config_name}'): - os.makedirs(f'{path}/{name}/{name}{config_name}') + if not os.path.exists(f"{path}/{name}/{name}{config_name}"): + os.makedirs(f"{path}/{name}/{name}{config_name}") # Save the results to a file - with open(f'{path}/{name}/{name}{config_name}/data.json', 'w') as f: + with open(f"{path}/{name}/{name}{config_name}/data.json", "w") as f: json.dump(data, f) def __str__(self:Self) -> str: @@ -216,7 +217,7 @@ def __init__( # noqa: PLR0913 self.metadata = metadata self.resultname = resultname self.id = id - + if load: return @@ -248,7 +249,7 @@ def load(self, data): def save(self:Self, path: str, name: str, config:dict = None): """Save the MIAResults to disk.""" - + print(config) result_config = config["attack_list"][name] @@ -281,11 +282,11 @@ def save(self:Self, path: str, name: str, config:dict = None): os.makedirs(save_path) # Save the results to a file - with open(f'{save_path}/data.json', 'w') as f: + with open(f"{save_path}/data.json", "w") as f: json.dump(data, f) # Create ROC plot for MIAResult - filename = f'{save_path}/ROC' + filename = f"{save_path}/ROC" temp_res = MIAResult(load=True) temp_res.tpr = self.tpr temp_res.fpr = self.fpr @@ -295,17 +296,17 @@ def save(self:Self, path: str, name: str, config:dict = None): ) # Create SignalHistogram plot for MIAResult - filename = f'{save_path}/SignalHistogram.png' + filename = f"{save_path}/SignalHistogram.png" self.create_signal_histogram(filename = filename, signal_values = self.signal_values, true_labels = self.true_labels, threshold = self.threshold ) - + def get_strongest(self, results) -> list: """Method for selecting the strongest attack.""" return max((res for res in results), key=lambda d: d.roc_auc) - + def create_signal_histogram(self, filename, signal_values, true_labels, threshold) -> None: values = np.array(signal_values).ravel() @@ -347,7 +348,7 @@ def create_signal_histogram(self, filename, signal_values, true_labels, threshol plt.title("Signal histogram") plt.savefig(fname=filename, dpi=1000) plt.clf() - + def create_plot(self, results, filename = "", save_name = "") -> None: # Create plot for results @@ -356,11 +357,11 @@ def create_plot(self, results, filename = "", save_name = "") -> None: plt.fill_between(res.fpr, res.tpr, alpha=0.15) plt.plot(res.fpr, res.tpr, label=label) - + # Plot random guesses range01 = np.linspace(0, 1) plt.plot(range01, range01, "--", label="Random guess") - + # Set plot parameters plt.yscale("log") plt.xscale("log") @@ -368,12 +369,12 @@ def create_plot(self, results, filename = "", save_name = "") -> None: plt.ylim(bottom=1e-5) plt.tight_layout() plt.grid() - plt.legend(bbox_to_anchor =(0.5,-0.27), loc='lower center') + plt.legend(bbox_to_anchor =(0.5,-0.27), loc="lower center") plt.xlabel("False positive rate (FPR)") plt.ylabel("True positive rate (TPR)") plt.title(save_name+"ROC Curve") - plt.savefig(fname=f"{filename}.png", dpi=1000, bbox_inches='tight') + plt.savefig(fname=f"{filename}.png", dpi=1000, bbox_inches="tight") plt.clf() def create_results( @@ -388,7 +389,7 @@ def create_results( self.create_plot(results, filename, save_name) return self._latex(results, save_name, filename) - + def _latex(self, results, subsection, filename): """Latex method for MIAResult.""" @@ -400,12 +401,12 @@ def _latex(self, results, subsection, filename): \\end{{figure}} """ - latex_content += f''' - \\resizebox{{\\linewidth}}{{!}}{{% - \\begin{{tabularx}}{{\\textwidth}}{{l c l l l l}} + latex_content += """ + \\resizebox{\\linewidth}{!}{% + \\begin{tabularx}{\\textwidth}{l c l l l l} Attack name & attack config & TPR: 1.0\\%FPR & 0.1\\%FPR & 0.01\\%FPR & 0.0\\%FPR \\\\ \\hline - ''' + """ def config_latex_style(config): config = " \\\\ ".join(config.split("-")[1:]) @@ -414,11 +415,11 @@ def config_latex_style(config): for res in results: config = config_latex_style(res.id) - latex_content += f'''{"-".join(res.resultname.split("_"))} & {config} & {res.fixed_fpr_table["TPR@1.0%FPR"]} & {res.fixed_fpr_table["TPR@0.1%FPR"]} & {res.fixed_fpr_table["TPR@0.01%FPR"]} & {res.fixed_fpr_table["TPR@0.0%FPR"]} \\\\ \\hline - ''' - latex_content += f""" - \\end{{tabularx}} - }} + latex_content += f"""{"-".join(res.resultname.split("_"))} & {config} & {res.fixed_fpr_table["TPR@1.0%FPR"]} & {res.fixed_fpr_table["TPR@0.1%FPR"]} & {res.fixed_fpr_table["TPR@0.01%FPR"]} & {res.fixed_fpr_table["TPR@0.0%FPR"]} \\\\ \\hline + """ + latex_content += """ + \\end{tabularx} + } \\newline """ return latex_content @@ -486,11 +487,11 @@ def extract_tensors_from_subset(dataset: Dataset) -> Tensor: } # Check if path exists, otherwise create it. - if not os.path.exists(f'{save_path}'): - os.makedirs(f'{save_path}') + if not os.path.exists(f"{save_path}"): + os.makedirs(f"{save_path}") # Save the results to a file - with open(f'{save_path}/data.json', 'w') as f: + with open(f"{save_path}/data.json", "w") as f: json.dump(data, f) pass @@ -526,6 +527,7 @@ def __init__( # noqa: PLR0913 Args: ---- + """ # Initialize values to result object # self.values = values @@ -555,16 +557,16 @@ def save(self:Self, path: str, name: str, config:dict = None): # Get the name for the attack configuration config_name = get_config_name(result_config) self.id = f"{name}{config_name}" - save_path = f'{path}/{name}/{self.id}' + save_path = f"{path}/{name}/{self.id}" # Check if path exists, otherwise create it. - if not os.path.exists(f'{save_path}'): - os.makedirs(f'{save_path}') + if not os.path.exists(f"{save_path}"): + os.makedirs(f"{save_path}") # Save the results to a file - with open(f'{save_path}/data.json', 'w') as f: + with open(f"{save_path}/data.json", "w") as f: json.dump(data, f) - + class TEMPLATEResult: """Contains results related to the performance of the metric. It contains the results for multiple fpr.""" @@ -576,6 +578,7 @@ def __init__( # noqa: PLR0913 Args: ---- + """ # Initialize values to result object # self.values = values @@ -607,13 +610,13 @@ def save(self:Self, path: str, name: str, config:dict = None): self.id = f"{name}{config_name}" # Check if path exists, otherwise create it. - if not os.path.exists(f'{path}/{name}/{self.id}'): - os.makedirs(f'{path}/{name}/{self.id}') + if not os.path.exists(f"{path}/{name}/{self.id}"): + os.makedirs(f"{path}/{name}/{self.id}") # Save the results to a file - with open(f'{path}/{name}/{self.id}/data.json', 'w') as f: + with open(f"{path}/{name}/{self.id}/data.json", "w") as f: json.dump(data, f) - + def create_result(self, results): """Method for results.""" def _latex(results): @@ -629,7 +632,7 @@ def _latex(results): pass def get_result_fixed_fpr(fpr, tpr): - + # Function to find TPR at given FPR thresholds def find_tpr_at_fpr(fpr_array:np.ndarray, tpr_array:np.ndarray, threshold:float): #-> Optional[str]: try: @@ -655,11 +658,10 @@ def get_config_name(config): for key, value in zip(list(config.keys()), list(config.values())): if key in exclude: pass + elif type(value) is bool: + config_name += f"-{key}" else: - if type(value) is bool: - config_name += f"-{key}" - else: - config_name += f"-{key}={value}" + config_name += f"-{key}={value}" return config_name def reduce_to_unique_labels(results): @@ -668,43 +670,43 @@ def reduce_to_unique_labels(results): # Dictionary to store name as key and a list of configurations as value name_configs = defaultdict(list) - + # Parse each string and store configurations for s in strings: - parts = s.split('-') + parts = s.split("-") name = parts[0] # The first part is the name - config = '-'.join(parts[1:]) if len(parts) > 1 else '' # The rest is the configuration + config = "-".join(parts[1:]) if len(parts) > 1 else "" # The rest is the configuration name_configs[name].append(config) # Store the configuration under the name - + def find_common_suffix(configs): """Helper function to find the common suffix among multiple configurations""" if not configs: - return '' - + return "" + # Split each configuration by "-" and zip them in reverse to compare backwards - reversed_configs = [config.split('-')[::-1] for config in configs] + reversed_configs = [config.split("-")[::-1] for config in configs] common_suffix = [] - + for elements in zip(*reversed_configs): if all(e == elements[0] for e in elements): common_suffix.append(elements[0]) else: break - + # Return the common suffix as a string, reversed back to normal order - return '-'.join(common_suffix[::-1]) - + return "-".join(common_suffix[::-1]) + result = [] - + # Process each name and its configurations for name, configs in name_configs.items(): if len(configs) > 1: # Find the common suffix for the configurations common_suffix = find_common_suffix(configs) - + # Remove the common suffix from each configuration trimmed_configs = [config[:-(len(common_suffix) + 1)] if common_suffix and config.endswith(common_suffix) else config for config in configs] - + # Process configurations based on whether they share the same pattern for config in trimmed_configs: if config: @@ -714,7 +716,7 @@ def find_common_suffix(configs): else: # If only one configuration, just return the string as is result.append(f"{name}") - + return result diff --git a/leakpro/reporting/report_handler.py b/leakpro/reporting/report_handler.py index 067c3202..340d57f3 100644 --- a/leakpro/reporting/report_handler.py +++ b/leakpro/reporting/report_handler.py @@ -1,12 +1,8 @@ import json import logging -import numpy as np import os import subprocess -from leakpro.metrics.attack_result import CombinedMetricResult, MIAResult - -import matplotlib.pyplot as plt # Report Handler class ReportHandler(): @@ -20,15 +16,15 @@ def __init__(self, report_dir: str, logger:logging.Logger) -> None: "GIAResults", "SyntheticResult" ] - + # Initiate empty lists for the different types of LeakPro attack types for key in self.leakpro_types: self.pdf_results[key] = [] def save_results(self, attack_name: str, result_data: dict, config: dict) -> None: - """Save attack results. """ - - self.logger.info(f'Saving results for {attack_name}') + """Save attack results.""" + + self.logger.info(f"Saving results for {attack_name}") result_data.save(self.report_dir, attack_name, config) def load_results(self): @@ -50,7 +46,7 @@ def load_results(self): cls = globals()[resulttype] else: raise ValueError(f"Class '{resulttype}' not found.") - + # Initialize the class using the saved primitives instance = cls(load=True) instance.load(data) @@ -69,7 +65,7 @@ def load_results(self): def _get_results_of_name(self, results, resultname_value) -> list: indices = [idx for (idx, result) in enumerate(results) if result.resultname == resultname_value] return [results[idx] for idx in indices] - + def _get_all_attacknames(self): attack_name_list = [] for result in self.results: @@ -103,11 +99,11 @@ def create_results_strong(self): # If no results of type "result_type" is found, skip to next result_type if len(results) == 0: - self.logger.info(f"No \'strong\' results of type {result_type} found.") + self.logger.info(f"No 'strong' results of type {result_type} found.") continue # Get all attack names - attack_name_grouped_results = [self._get_results_of_name(results, name) for name in self._get_all_attacknames()] + attack_name_grouped_results = [self._get_results_of_name(results, name) for name in self._get_all_attacknames()] # Get the strongest result for each attack name strongest_results = [result[0].get_strongest(result) for result in attack_name_grouped_results] @@ -132,9 +128,9 @@ def create_results_attackname_grouped(self): if len(results) == 0: self.logger.info(f"No results of type {result_type} found.") continue - + for name in all_attack_names: - + try: # Get result for each attack names attack_results = self._get_results_of_name(results, name) @@ -163,21 +159,21 @@ def create_report(self): self._compile_pdf() def _init_pdf(self,): - self.latex_content = f""" - \\documentclass{{article}} - \\usepackage{{tabularx}} - \\usepackage{{graphicx}} - \\usepackage{{graphics}} - \\begin{{document}} + self.latex_content = """ + \\documentclass{article} + \\usepackage{tabularx} + \\usepackage{graphicx} + \\usepackage{graphics} + \\begin{document} """ def _compile_pdf(self, install_flag: bool = False): """Method to compile PDF.""" - self.latex_content += f""" - \\end{{document}} + self.latex_content += """ + \\end{document} """ - with open(f'{self.report_dir}/LeakPro_output.tex', 'w') as f: + with open(f"{self.report_dir}/LeakPro_output.tex", "w") as f: f.write(self.latex_content) # Check if pdflatex is installed @@ -186,7 +182,7 @@ def _compile_pdf(self, install_flag: bool = False): assert "pdflatex" in check except: # Option to install pdflatex - self.logger.info("Could not find pdflatex installed\nPlease install pdflatex with \"apt install texlive-latex-base\"") + self.logger.info('Could not find pdflatex installed\nPlease install pdflatex with "apt install texlive-latex-base"') choice = input("Do you want to install pdflatex? (Y/n): ").lower() if (choice in {"y", "yes"} or install_flag==True): proc = subprocess.Popen(["apt", "install", "-y", "texlive-latex-base"], stdout=subprocess.DEVNULL) @@ -194,10 +190,10 @@ def _compile_pdf(self, install_flag: bool = False): # Compile PDF if possible try: - cmd = ['pdflatex', '-interaction', 'nonstopmode', f'{self.report_dir}/LeakPro_output.tex'] + cmd = ["pdflatex", "-interaction", "nonstopmode", f"{self.report_dir}/LeakPro_output.tex"] proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL) proc.communicate() self.logger.info("PDF compiled") except Exception as e: print(e) - self.logger.info("Could not compile PDF") \ No newline at end of file + self.logger.info("Could not compile PDF") diff --git a/leakpro/reporting/utils.py b/leakpro/reporting/utils.py index b078c000..9573d272 100644 --- a/leakpro/reporting/utils.py +++ b/leakpro/reporting/utils.py @@ -42,4 +42,4 @@ def prepare_privacy_risk_report( metric_result=audit_results, save=True, filename=f"{save_path}/Histogram.png", - ) \ No newline at end of file + ) diff --git a/leakpro/tests/conftest.py b/leakpro/tests/conftest.py index cf2c7168..d70b2f71 100644 --- a/leakpro/tests/conftest.py +++ b/leakpro/tests/conftest.py @@ -2,35 +2,34 @@ import os import shutil -from typing import Generator import pytest import yaml from dotmap import DotMap from leakpro import LeakPro -from leakpro.tests.input_handler.image_utils import setup_image_test +from leakpro.tests.constants import STORAGE_PATH, get_audit_config from leakpro.tests.input_handler.image_input_handler import ImageInputHandler +from leakpro.tests.input_handler.image_utils import setup_image_test from leakpro.tests.input_handler.tabular_input_handler import TabularInputHandler from leakpro.tests.input_handler.tabular_utils import setup_tabular_test -from leakpro.tests.constants import STORAGE_PATH, get_audit_config @pytest.fixture(scope="session") def manage_storage_directory(): """Fixture to create and remove the storage directory.""" - + # Setup: Create the folder at the start of the test session os.makedirs(STORAGE_PATH, exist_ok=True) - + # Yield control back to the test session yield - + # Teardown: Remove the folder and its contents at the end of the session if os.path.exists(STORAGE_PATH): shutil.rmtree(STORAGE_PATH) -@pytest.fixture() -def image_handler(manage_storage_directory) -> Generator[ImageInputHandler, None, None]: +@pytest.fixture +def image_handler(manage_storage_directory) -> ImageInputHandler: """Fixture for the image input handler to be shared between many tests.""" config = DotMap() @@ -41,16 +40,16 @@ def image_handler(manage_storage_directory) -> Generator[ImageInputHandler, None config_path = f"{STORAGE_PATH}/image_test_config.yaml" with open(config_path, "w") as f: yaml.dump(config.toDict(), f) - + leakpro = LeakPro(ImageInputHandler, config_path) handler = leakpro.handler handler.configs = DotMap(handler.configs) # Yield control back to the test session - yield handler - -@pytest.fixture() -def tabular_handler(manage_storage_directory) -> Generator[TabularInputHandler, None, None]: + return handler + +@pytest.fixture +def tabular_handler(manage_storage_directory) -> TabularInputHandler: """Fixture for the image input handler to be shared between many tests.""" config = DotMap() @@ -61,10 +60,10 @@ def tabular_handler(manage_storage_directory) -> Generator[TabularInputHandler, config_path = f"{STORAGE_PATH}/tabular_test_config.yaml" with open(config_path, "w") as f: yaml.dump(config.toDict(), f) - + leakpro = LeakPro(TabularInputHandler, config_path) handler = leakpro.handler handler.configs = DotMap(handler.configs) # Yield control back to the test session - yield handler + return handler diff --git a/leakpro/tests/input_handler/image_input_handler.py b/leakpro/tests/input_handler/image_input_handler.py index a2fe2f32..3df37d1c 100644 --- a/leakpro/tests/input_handler/image_input_handler.py +++ b/leakpro/tests/input_handler/image_input_handler.py @@ -5,10 +5,11 @@ from torch.utils.data import DataLoader from tqdm import tqdm -from leakpro.utils.import_helper import Self from leakpro.input_handler.abstract_input_handler import AbstractInputHandler +from leakpro.utils.import_helper import Self from leakpro.utils.logger import logger + class ImageInputHandler(AbstractInputHandler): """Class to handle the user input for the CIFAR10 dataset.""" diff --git a/leakpro/tests/input_handler/image_utils.py b/leakpro/tests/input_handler/image_utils.py index 9c1c3859..9beb14c1 100644 --- a/leakpro/tests/input_handler/image_utils.py +++ b/leakpro/tests/input_handler/image_utils.py @@ -10,9 +10,9 @@ from torch.utils.data import TensorDataset from torchvision import transforms +from leakpro.tests.constants import STORAGE_PATH, get_image_handler_config from leakpro.utils.import_helper import Self -from leakpro.tests.constants import STORAGE_PATH, get_image_handler_config class ConvNet(Module): """Convolutional Neural Network model.""" @@ -128,10 +128,10 @@ def create_mock_image_dataset() -> str: def create_mock_model_and_metadata() -> str: """Creates a mock model and saves it to a file.""" parameters = get_image_handler_config() - + if not os.path.exists(parameters.target_folder): os.makedirs(parameters.target_folder) - + # Create a mock model model = ConvNet() model_path = parameters.target_folder + "/target_model.pkl" diff --git a/leakpro/tests/input_handler/tabular_input_handler.py b/leakpro/tests/input_handler/tabular_input_handler.py index 7caaa288..3853a925 100644 --- a/leakpro/tests/input_handler/tabular_input_handler.py +++ b/leakpro/tests/input_handler/tabular_input_handler.py @@ -9,6 +9,7 @@ from leakpro import AbstractInputHandler from leakpro.input_handler.abstract_input_handler import AbstractInputHandler + class TabularInputHandler(AbstractInputHandler): """Class to handle the user input for the CIFAR10 dataset.""" @@ -42,11 +43,11 @@ def train( criterion = self.get_criterion() optimizer = self.get_optimizer(model) - + for e in tqdm(range(epochs), desc="Training Progress"): model.train() train_acc, train_loss = 0.0, 0.0 - + for data, target in dataloader: target = target.float().unsqueeze(1) data, target = data.to(dev, non_blocking=True), target.to(dev, non_blocking=True) @@ -56,11 +57,11 @@ def train( loss = criterion(output, target) pred = sigmoid(output) >= 0.5 train_acc += pred.eq(target).sum().item() - + loss.backward() optimizer.step() train_loss += loss.item() - + train_acc = train_acc/len(dataloader.dataset) train_loss = train_loss/len(dataloader) diff --git a/leakpro/tests/input_handler/tabular_utils.py b/leakpro/tests/input_handler/tabular_utils.py index 862ae8ba..f820f4c7 100644 --- a/leakpro/tests/input_handler/tabular_utils.py +++ b/leakpro/tests/input_handler/tabular_utils.py @@ -1,19 +1,17 @@ import os import pickle +import random import numpy as np -import torch.nn.functional as F # noqa: N812 -import pandas as pd -import random from dotmap import DotMap from sklearn.preprocessing import OneHotEncoder - -from torch import from_numpy, tensor, save -from torch.nn import Module, Linear, ReLU +from torch import from_numpy, save, tensor +from torch.nn import Linear, Module, ReLU from torch.utils.data import TensorDataset from leakpro.tests.constants import STORAGE_PATH, get_tabular_handler_config + class MLP(Module): def __init__(self, input_size, hidden_size, num_classes): super(MLP, self).__init__() @@ -23,7 +21,7 @@ def __init__(self, input_size, hidden_size, num_classes): self.fc1 = Linear(input_size, hidden_size) self.relu = ReLU() self.fc2 = Linear(hidden_size, num_classes) - + def forward(self, x): out = self.fc1(x) out = self.relu(out) @@ -36,15 +34,15 @@ class DatasetWithSubset(TensorDataset): def __init__(self, x:tensor, y:tensor, dec_to_onehot:dict, one_hot_encoded:bool=True): self.x = x self.y = y - + # create dictionary to map categorical columns to number of classes self.dec_to_onehot = dec_to_onehot self.one_hot_encoded = one_hot_encoded - + def subset(self, indices): - return DatasetWithSubset(self.x[indices], - self.y[indices], - self.dec_to_onehot, + return DatasetWithSubset(self.x[indices], + self.y[indices], + self.dec_to_onehot, self.one_hot_encoded) def __len__(self): return len(self.y) @@ -88,7 +86,7 @@ def setup_tabular_test()->None: def create_mock_tabular_dataset() -> str: """Creates a mock tabular dataset with random images.""" parameters = get_tabular_handler_config() - + # Constants to create a mock tabular dataset n_points = parameters.data_points n_continuous = parameters.n_continuous @@ -97,31 +95,31 @@ def create_mock_tabular_dataset() -> str: dataset_name = "tabular_handler_dataset.pkl" continuous_data = np.random.randn(n_points, n_continuous) - + categorical_data = [] for _ in range(n_categorical): classes = np.random.randint(2, 10) categorical_data.append([random.choice(range(classes)) for _ in range(n_points)]) categorical_data = np.array(categorical_data).T # Transpose to align rows with n_points - + one_hot_encoder = OneHotEncoder(sparse_output=False) categorical_one_hot = one_hot_encoder.fit_transform(categorical_data) combined_data = np.hstack([continuous_data, categorical_one_hot]) - + dec_to_onehot = {} for i in range(n_continuous): dec_to_onehot[i] = [i] # Continuous features are identity-mapped - + n_cols = n_continuous for i in range(n_continuous, n_continuous + n_categorical): dec_to_onehot[i] = list(one_hot_encoder.categories_[i - n_continuous] + n_cols) n_cols += len(dec_to_onehot[i]) one_hot_encoded = True - + data = from_numpy(combined_data).float() label = from_numpy(np.random.randint(0, num_classes, n_points)).float() - + dataset = DatasetWithSubset(data, label, dec_to_onehot, one_hot_encoded) # Save the dataset to a .pkg file @@ -134,10 +132,10 @@ def create_mock_tabular_dataset() -> str: def create_mock_model_and_metadata(input_size:int) -> str: """Creates a mock model and saves it to a file.""" parameters = get_tabular_handler_config() - + if not os.path.exists(parameters.target_folder): os.makedirs(parameters.target_folder) - + # Create a mock model model = MLP(input_size=input_size, hidden_size=64, num_classes=parameters.num_classes) model_path = parameters.target_folder + "/target_model.pkl" diff --git a/leakpro/tests/input_handler/test_tabular_handler.py b/leakpro/tests/input_handler/test_tabular_handler.py index 5b15b213..29cb0f25 100644 --- a/leakpro/tests/input_handler/test_tabular_handler.py +++ b/leakpro/tests/input_handler/test_tabular_handler.py @@ -47,24 +47,24 @@ def test_abstract_handler_setup_tabular(tabular_handler:TabularInputHandler) -> assert len(labels) == parameters.data_points assert np.all(labels <= parameters.num_classes) assert np.all(labels >= 0) - + def test_tabular_extension_class(tabular_handler:TabularInputHandler) -> None: """Test the extension methods of the tabular handler.""" data, _ = next(iter(tabular_handler.get_dataloader(np.arange(10)))) - + assert data is not None - + if not tabular_handler.one_hot_encoded: data = tabular_handler.one_hot_encode(data) - + data2 = tabular_handler.one_hot_to_categorical(data) assert data2 is not None data3 = tabular_handler.one_hot_encode(data2) assert data3 is not None - + assert equal(data, data3) assert data2.shape[1] <= data.shape[1] - + def test_tabular_input_handler(tabular_handler:TabularInputHandler) -> None: """Test the CIFAR10 input handler.""" @@ -84,7 +84,7 @@ def test_tabular_input_handler(tabular_handler:TabularInputHandler) -> None: tabular_handler.get_criterion(), tabular_handler.get_optimizer(tabular_handler.target_model), parameters.epochs) - # move back to cpu + # move back to cpu after_weights = train_dict["model"].to("cpu").state_dict() weights_changed = [equal(before_weights[key], after_weights[key]) for key in before_weights] assert any(weights_changed) is False diff --git a/leakpro/tests/mia_attacks/attacks/test_lira.py b/leakpro/tests/mia_attacks/attacks/test_lira.py index 641eed0d..a66669e0 100644 --- a/leakpro/tests/mia_attacks/attacks/test_lira.py +++ b/leakpro/tests/mia_attacks/attacks/test_lira.py @@ -1,33 +1,34 @@ -from pytest import raises from math import isnan +from pytest import raises -from leakpro.attacks.utils.shadow_model_handler import ShadowModelHandler from leakpro.attacks.mia_attacks.lira import AttackLiRA +from leakpro.attacks.utils.shadow_model_handler import ShadowModelHandler +from leakpro.tests.constants import get_audit_config, get_shadow_model_config from leakpro.tests.input_handler.image_input_handler import ImageInputHandler -from leakpro.tests.constants import get_shadow_model_config, get_audit_config + def test_lira_setup(image_handler:ImageInputHandler) -> None: """Test the initialization of LiRA.""" audit_config = get_audit_config() lira_params = audit_config.attack_list.lira lira_obj = AttackLiRA(image_handler, lira_params) - + assert lira_obj is not None assert lira_obj.target_model is not None assert lira_obj.online == lira_params.online assert lira_obj.num_shadow_models == lira_params.num_shadow_models assert lira_obj.training_data_fraction == lira_params.training_data_fraction assert lira_obj.memorization == False - + lira_params.num_shadow_models = -1 with raises(ValueError) as excinfo: lira_obj = AttackLiRA(image_handler, lira_params) assert str(excinfo.value) == "num_shadow_models must be between 1 and None" - + lira_params.num_shadow_models = 3 - + description = lira_obj.description() assert len(description) == 4 @@ -35,48 +36,48 @@ def test_lira_prepare_online_attack(image_handler:ImageInputHandler) -> None: audit_config = get_audit_config() lira_params = audit_config.attack_list.lira lira_params.online = True - + image_handler.configs.shadow_model = get_shadow_model_config() lira_obj = AttackLiRA(image_handler, lira_params) - + if ShadowModelHandler.is_created() == False: ShadowModelHandler(image_handler) - + lira_obj.prepare_attack() - + # ensure correct number of shadow models are read assert len(lira_obj.shadow_models) == lira_params.num_shadow_models # ensure the attack data indices correspond to the correct pool assert sorted(lira_obj.attack_data_indices) == list(range(image_handler.population_size)) # memorization is tested in a different module assert lira_obj.memorization == False - + # Check that the filtering of the attack data is correct (this is done after shadow models are created) n_attack_points = len(lira_obj.in_members) + len(lira_obj.out_members) assert n_attack_points > 0 assert lira_obj.shadow_models_logits.shape == (n_attack_points, lira_params.num_shadow_models) assert lira_obj.target_logits.shape == (n_attack_points, ) - + def test_lira_prepare_offline_attack(image_handler:ImageInputHandler) -> None: audit_config = get_audit_config() lira_params = audit_config.attack_list.lira lira_params.online = False - + image_handler.configs.shadow_model = get_shadow_model_config() lira_obj = AttackLiRA(image_handler, lira_params) - + if ShadowModelHandler.is_created() == False: ShadowModelHandler(image_handler) - + lira_obj.prepare_attack() - + # ensure correct number of shadow models are read assert len(lira_obj.shadow_models) == lira_params.num_shadow_models # ensure the attack data indices correspond to the correct pool assert sorted(lira_obj.attack_data_indices) == sorted(set(range(image_handler.population_size)) - set(image_handler.test_indices) - set(image_handler.train_indices)) # memorization is tested in a different module assert lira_obj.memorization == False - + # Check that the filtering of the attack data is correct (this is done after shadow models are created) n_attack_points = len(lira_obj.in_members) + len(lira_obj.out_members) assert n_attack_points > 0 @@ -94,7 +95,7 @@ def test_lira_online_attack(image_handler:ImageInputHandler): if ShadowModelHandler.is_created() == False: ShadowModelHandler(image_handler) lira_obj.prepare_attack() - + # Test standard deviation calculation std_fixed = lira_obj.get_std(lira_obj.shadow_models_logits.flatten(), lira_obj.in_indices_masks.flatten(), @@ -107,14 +108,14 @@ def test_lira_online_attack(image_handler:ImageInputHandler): lira_obj.in_indices_masks.flatten(), True, "carlini") - + std_individual = lira_obj.get_std(lira_obj.shadow_models_logits.flatten(), lira_obj.in_indices_masks.flatten(), True, "individual_carlini") assert std_fixed == std_carlini assert std_fixed == std_individual - + # Test attack lira_obj.run_attack() assert lira_obj.fixed_in_std != lira_obj.fixed_out_std @@ -122,7 +123,7 @@ def test_lira_online_attack(image_handler:ImageInputHandler): assert len(lira_obj.in_member_signals)+len(lira_obj.out_member_signals) == n_attack_points assert any(isnan(x) for x in lira_obj.in_member_signals) == False assert any(isnan(x) for x in lira_obj.out_member_signals) == False - + def test_lira_online_attack(image_handler:ImageInputHandler): # Set up for testing audit_config = get_audit_config() @@ -134,11 +135,11 @@ def test_lira_online_attack(image_handler:ImageInputHandler): ShadowModelHandler(image_handler) lira_obj.prepare_attack() lira_obj.fix_var_threshold = 0.0 - + # Test attack lira_obj.run_attack() assert lira_obj.fixed_in_std != lira_obj.fixed_out_std n_attack_points = len(lira_obj.in_members) + len(lira_obj.out_members) assert len(lira_obj.in_member_signals)+len(lira_obj.out_member_signals) == n_attack_points assert any(isnan(x) for x in lira_obj.in_member_signals) == False - assert any(isnan(x) for x in lira_obj.out_member_signals) == False \ No newline at end of file + assert any(isnan(x) for x in lira_obj.out_member_signals) == False diff --git a/leakpro/tests/mia_attacks/utils/test_shadow_model_handler.py b/leakpro/tests/mia_attacks/utils/test_shadow_model_handler.py index 7f767689..07d4a528 100644 --- a/leakpro/tests/mia_attacks/utils/test_shadow_model_handler.py +++ b/leakpro/tests/mia_attacks/utils/test_shadow_model_handler.py @@ -1,20 +1,22 @@ """Test the shadow model handler module.""" import os -import numpy as np +import numpy as np from pytest import raises + from leakpro.attacks.utils.shadow_model_handler import ShadowModelHandler -from leakpro.tests.input_handler.image_input_handler import ImageInputHandler from leakpro.tests.constants import get_shadow_model_config +from leakpro.tests.input_handler.image_input_handler import ImageInputHandler + def test_shadow_model_handler_singleton(image_handler:ImageInputHandler) -> None: """Test that only one instance gets created.""" - + image_handler.configs.shadow_model = get_shadow_model_config() if ShadowModelHandler.is_created() == False: sm = ShadowModelHandler(image_handler) assert ShadowModelHandler.is_created() == True - + with raises(ValueError) as excinfo: ShadowModelHandler(image_handler) assert str(excinfo.value) == "Singleton already created with specific parameters." @@ -26,12 +28,12 @@ def test_shadow_model_handler_creation_from_target(image_handler:ImageInputHandl if ShadowModelHandler.is_created() == True: ShadowModelHandler.delete_instance() sm = ShadowModelHandler(image_handler) - + assert sm.batch_size == image_handler.target_model_metadata["batch_size"] assert sm.epochs == image_handler.target_model_metadata["epochs"] assert sm.init_params == image_handler.target_model_metadata["init_params"] assert sm.model_blueprint == image_handler.target_model.__class__ - + image_handler.target_model_metadata["optimizer"].pop("name") assert sm.optimizer_config == image_handler.target_model_metadata["optimizer"] image_handler.target_model_metadata["loss"].pop("name") @@ -39,34 +41,34 @@ def test_shadow_model_handler_creation_from_target(image_handler:ImageInputHandl def test_shadow_model_creation_and_loading(image_handler:ImageInputHandler) -> None: image_handler.configs.shadow_model = get_shadow_model_config() - + # Test initialization if ShadowModelHandler.is_created() == True: ShadowModelHandler.delete_instance() sm = ShadowModelHandler(image_handler) - + assert sm.batch_size == image_handler.configs.shadow_model.batch_size assert sm.epochs == image_handler.configs.shadow_model.epochs assert sm.init_params == {} assert sm.model_blueprint is not None assert sm.optimizer_config is not None assert sm.loss_config is not None - + # Test creation n_models = 1 training_fraction = 0.5 online = False - + entries_start = os.listdir(sm.storage_path) n_entries_start = len(entries_start) - + indx = sm.create_shadow_models(n_models, image_handler.test_indices, training_fraction, online)[0] entries = os.listdir(sm.storage_path) n_entries_phase1 = len(entries) assert n_entries_phase1 - n_entries_start == 2*n_models assert f"metadata_{indx}.pkl" in entries assert f"shadow_model_{indx}.pkl" in entries - + indx2 = sm.create_shadow_models(n_models, image_handler.test_indices, training_fraction, ~online)[0] entries = os.listdir(sm.storage_path) @@ -75,25 +77,25 @@ def test_shadow_model_creation_and_loading(image_handler:ImageInputHandler) -> N assert f"shadow_model_{indx}.pkl" in entries assert f"metadata_{indx2}.pkl" in entries assert f"shadow_model_{indx2}.pkl" in entries - + # Test loading meta_0 = sm._load_metadata(sm.storage_path + f"/metadata_{indx}.pkl") meta_1 = sm._load_metadata(sm.storage_path + f"/metadata_{indx2}.pkl") assert meta_0["online"] == online assert meta_1["online"] == ~online assert meta_1["num_train"] == meta_0["num_train"] - + shadow_model_indices = [indx, indx2] models, indices = sm.get_shadow_models(shadow_model_indices) for model in models: assert model.model_obj.__class__.__name__ == "ConvNet" - + # Test index mask # check what test data is included in the training data of the shadow models mask = sm.get_in_indices_mask(shadow_model_indices, np.array(image_handler.test_indices)) - + true_mask_0 = np.array([True if item in meta_0["train_indices"] else False for item in image_handler.test_indices]) assert np.array_equal(mask[:, 0], true_mask_0) - + true_mask_1 = np.array([True if item in meta_1["train_indices"] else False for item in image_handler.test_indices]) assert np.array_equal(mask[:, 1], true_mask_1) diff --git a/tests/test_attack_result.py/__init__.py b/leakpro/tests/test_attack_result/__init__.py similarity index 100% rename from tests/test_attack_result.py/__init__.py rename to leakpro/tests/test_attack_result/__init__.py diff --git a/tests/test_attack_result.py/test_attack_result.py b/leakpro/tests/test_attack_result/test_attack_result.py similarity index 100% rename from tests/test_attack_result.py/test_attack_result.py rename to leakpro/tests/test_attack_result/test_attack_result.py diff --git a/tests/test_report_handler/__init__.py b/leakpro/tests/test_report_handler/__init__.py similarity index 100% rename from tests/test_report_handler/__init__.py rename to leakpro/tests/test_report_handler/__init__.py diff --git a/tests/test_report_handler/test_report_handler.py b/leakpro/tests/test_report_handler/test_report_handler.py similarity index 100% rename from tests/test_report_handler/test_report_handler.py rename to leakpro/tests/test_report_handler/test_report_handler.py From 133220882565abc585cd7456cade50c51ab537c1 Mon Sep 17 00:00:00 2001 From: henrikfo Date: Mon, 28 Oct 2024 12:24:50 +0000 Subject: [PATCH 04/14] updated raport handler --- leakpro/metrics/attack_result.py | 11 ++--- leakpro/reporting/report_handler.py | 63 ++++++++++++++++++++--------- 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/leakpro/metrics/attack_result.py b/leakpro/metrics/attack_result.py index dfa8ad90..3ae62af1 100644 --- a/leakpro/metrics/attack_result.py +++ b/leakpro/metrics/attack_result.py @@ -303,6 +303,7 @@ def save(self:Self, path: str, name: str, config:dict = None): threshold = self.threshold ) + @classmethod def get_strongest(self, results) -> list: """Method for selecting the strongest attack.""" return max((res for res in results), key=lambda d: d.roc_auc) @@ -377,6 +378,7 @@ def create_plot(self, results, filename = "", save_name = "") -> None: plt.savefig(fname=f"{filename}.png", dpi=1000, bbox_inches="tight") plt.clf() + @classmethod def create_results( self: Self, results: list, @@ -496,6 +498,7 @@ def extract_tensors_from_subset(dataset: Dataset) -> Tensor: pass + @classmethod def create_result(self: Self, attack_name: str, save_path: str) -> None: """Result method for GIA.""" @@ -617,13 +620,7 @@ def save(self:Self, path: str, name: str, config:dict = None): with open(f"{path}/{name}/{self.id}/data.json", "w") as f: json.dump(data, f) - def create_result(self, results): - """Method for results.""" - def _latex(results): - """Latex method for TEMPLATEResult""" - pass - pass - + @classmethod def create_result(self, results): """Method for results.""" def _latex(results): diff --git a/leakpro/reporting/report_handler.py b/leakpro/reporting/report_handler.py index 340d57f3..5744fd75 100644 --- a/leakpro/reporting/report_handler.py +++ b/leakpro/reporting/report_handler.py @@ -80,13 +80,22 @@ def create_results_all(self) -> None: results = [res for res in self.results if res.resulttype == result_type] # If no results of type "result_type" is found, skip to next result_type - if len(results) == 0: + if not results: self.logger.info(f"No results of type {result_type} found.") continue - # Create all results - merged_result = results[0].create_results(results=results, save_dir=self.report_dir, save_name="all_results") - self.pdf_results[result_type].append(merged_result) + # Check if the result type has a 'create_results' method + try: + result_class = globals().get(result_type) + except: + self.logger.info(f"No {result_type} class could be found or exists") + continue + + if hasattr(result_class, 'create_results') and callable(getattr(result_class, 'create_results')): + + # Create all results + merged_result = result_class.create_results(results=results, save_dir=self.report_dir, save_name="all_results") + self.pdf_results[result_type].append(merged_result) except Exception as e: print("all", e) @@ -98,19 +107,26 @@ def create_results_strong(self): results = [res for res in self.results if res.resulttype == result_type] # If no results of type "result_type" is found, skip to next result_type - if len(results) == 0: + if not results: self.logger.info(f"No 'strong' results of type {result_type} found.") continue + try: + result_class = globals().get(result_type) + except: + self.logger.info(f"No {result_type} class could be found or exists") + continue + # Get all attack names attack_name_grouped_results = [self._get_results_of_name(results, name) for name in self._get_all_attacknames()] # Get the strongest result for each attack name - strongest_results = [result[0].get_strongest(result) for result in attack_name_grouped_results] + if hasattr(result_class, 'get_strongest') and callable(getattr(result_class, 'get_strongest')): + strongest_results = [result_class.get_strongest(result) for result in attack_name_grouped_results] - # Create the strongest results - merged_result = results[0].create_results(results=strongest_results, save_dir=self.report_dir, save_name="strong_results") - self.pdf_results[result_type].append(merged_result) + # Create the strongest results + merged_result = result_class.create_results(results=strongest_results, save_dir=self.report_dir, save_name="strong_results") + self.pdf_results[result_type].append(merged_result) except Exception as e: print("results_strong", e) @@ -125,22 +141,29 @@ def create_results_attackname_grouped(self): results = [res for res in self.results if res.resulttype == result_type] # If no results of type "result_type" is found, skip to next result_type - if len(results) == 0: - self.logger.info(f"No results of type {result_type} found.") + if not results: + self.logger.info(f"No results of type {result_type} to group.") + continue + + # Check if the result type has a 'create_results' method + try: + result_class = globals().get(result_type) + except: + self.logger.info(f"No {result_type} class could be found or exists") continue for name in all_attack_names: + if hasattr(result_class, 'create_results') and callable(getattr(result_class, 'create_results')): + try: + # Get result for each attack names + attack_results = self._get_results_of_name(results, name) - try: - # Get result for each attack names - attack_results = self._get_results_of_name(results, name) - - # Create results - merged_result = attack_results[0].create_results(results=attack_results, save_dir=self.report_dir, save_name="grouped_"+name) - self.pdf_results[result_type].append(merged_result) + # Create results + merged_result = result_class.create_results(results=attack_results, save_dir=self.report_dir, save_name="grouped_"+name) + self.pdf_results[result_type].append(merged_result) - except Exception as e: - print("create_results_attackname_grouped", e) + except Exception as e: + print("create_results_attackname_grouped", e) def create_report(self): """Method to create PDF report""" From c416dbbb58bf961cd300924fcab628c7e8cb4873 Mon Sep 17 00:00:00 2001 From: henrikfo Date: Tue, 29 Oct 2024 13:47:09 +0000 Subject: [PATCH 05/14] update report_handler and tests --- leakpro/reporting/report_handler.py | 40 +++++----- leakpro/tests/test_attack_result/__init__.py | 2 +- .../test_attack_result/test_attack_result.py | 79 ++++++++++--------- leakpro/tests/test_report_handler/__init__.py | 2 +- .../test_report_handler.py | 45 ++++++----- 5 files changed, 87 insertions(+), 81 deletions(-) diff --git a/leakpro/reporting/report_handler.py b/leakpro/reporting/report_handler.py index 5744fd75..ae6fcc67 100644 --- a/leakpro/reporting/report_handler.py +++ b/leakpro/reporting/report_handler.py @@ -3,12 +3,14 @@ import os import subprocess +from leakpro.utils.import_helper import Self + # Report Handler class ReportHandler(): """Implementation of the report handler.""" - def __init__(self, report_dir: str, logger:logging.Logger) -> None: + def __init__(self:Self, report_dir: str, logger:logging.Logger) -> None: self.logger = logger self.report_dir = report_dir self.pdf_results = {} @@ -21,13 +23,13 @@ def __init__(self, report_dir: str, logger:logging.Logger) -> None: for key in self.leakpro_types: self.pdf_results[key] = [] - def save_results(self, attack_name: str, result_data: dict, config: dict) -> None: + def save_results(self:Self, attack_name: str, result_data: dict, config: dict) -> None: """Save attack results.""" self.logger.info(f"Saving results for {attack_name}") result_data.save(self.report_dir, attack_name, config) - def load_results(self): + def load_results(self:Self): self.results = [] for parentdir in os.scandir(f"{self.report_dir}"): if parentdir.is_dir(): @@ -62,18 +64,18 @@ def load_results(self): except Exception as e: self.logger.info(f"Not able to load data, Error: {e}") - def _get_results_of_name(self, results, resultname_value) -> list: + def _get_results_of_name(self:Self, results, resultname_value) -> list: indices = [idx for (idx, result) in enumerate(results) if result.resultname == resultname_value] return [results[idx] for idx in indices] - def _get_all_attacknames(self): + def _get_all_attacknames(self:Self): attack_name_list = [] for result in self.results: if result.resultname not in attack_name_list: attack_name_list.append(result.resultname) return attack_name_list - def create_results_all(self) -> None: + def create_results_all(self:Self) -> None: for result_type in self.leakpro_types: try: # Get all results of type "Result" @@ -87,11 +89,11 @@ def create_results_all(self) -> None: # Check if the result type has a 'create_results' method try: result_class = globals().get(result_type) - except: + except: self.logger.info(f"No {result_type} class could be found or exists") continue - - if hasattr(result_class, 'create_results') and callable(getattr(result_class, 'create_results')): + + if hasattr(result_class, "create_results") and callable(result_class.create_results): # Create all results merged_result = result_class.create_results(results=results, save_dir=self.report_dir, save_name="all_results") @@ -100,7 +102,7 @@ def create_results_all(self) -> None: except Exception as e: print("all", e) - def create_results_strong(self): + def create_results_strong(self:Self): for result_type in self.leakpro_types: try: # Get all results of type "Result" @@ -113,7 +115,7 @@ def create_results_strong(self): try: result_class = globals().get(result_type) - except: + except: self.logger.info(f"No {result_type} class could be found or exists") continue @@ -121,7 +123,7 @@ def create_results_strong(self): attack_name_grouped_results = [self._get_results_of_name(results, name) for name in self._get_all_attacknames()] # Get the strongest result for each attack name - if hasattr(result_class, 'get_strongest') and callable(getattr(result_class, 'get_strongest')): + if hasattr(result_class, "get_strongest") and callable(result_class.get_strongest): strongest_results = [result_class.get_strongest(result) for result in attack_name_grouped_results] # Create the strongest results @@ -131,7 +133,7 @@ def create_results_strong(self): except Exception as e: print("results_strong", e) - def create_results_attackname_grouped(self): + def create_results_attackname_grouped(self:Self): # Get all attack names all_attack_names = self._get_all_attacknames() @@ -144,16 +146,16 @@ def create_results_attackname_grouped(self): if not results: self.logger.info(f"No results of type {result_type} to group.") continue - + # Check if the result type has a 'create_results' method try: result_class = globals().get(result_type) - except: + except: self.logger.info(f"No {result_type} class could be found or exists") continue for name in all_attack_names: - if hasattr(result_class, 'create_results') and callable(getattr(result_class, 'create_results')): + if hasattr(result_class, "create_results") and callable(result_class.create_results): try: # Get result for each attack names attack_results = self._get_results_of_name(results, name) @@ -165,7 +167,7 @@ def create_results_attackname_grouped(self): except Exception as e: print("create_results_attackname_grouped", e) - def create_report(self): + def create_report(self:Self): """Method to create PDF report""" # Create initial part of the document. @@ -181,7 +183,7 @@ def create_report(self): # Compile the PDF self._compile_pdf() - def _init_pdf(self,): + def _init_pdf(self:Self): self.latex_content = """ \\documentclass{article} \\usepackage{tabularx} @@ -190,7 +192,7 @@ def _init_pdf(self,): \\begin{document} """ - def _compile_pdf(self, install_flag: bool = False): + def _compile_pdf(self:Self, install_flag: bool = False): """Method to compile PDF.""" self.latex_content += """ diff --git a/leakpro/tests/test_attack_result/__init__.py b/leakpro/tests/test_attack_result/__init__.py index c4bfdc70..bba53df7 100644 --- a/leakpro/tests/test_attack_result/__init__.py +++ b/leakpro/tests/test_attack_result/__init__.py @@ -1 +1 @@ -"""Init file for attack result tests""" \ No newline at end of file +"""Init file for attack result tests.""" diff --git a/leakpro/tests/test_attack_result/test_attack_result.py b/leakpro/tests/test_attack_result/test_attack_result.py index dab9ce27..56d3f2ae 100644 --- a/leakpro/tests/test_attack_result/test_attack_result.py +++ b/leakpro/tests/test_attack_result/test_attack_result.py @@ -1,15 +1,16 @@ -import unittest -import os import json -import logging -import subprocess +import os import tempfile -from unittest.mock import MagicMock, patch, mock_open, call +import unittest +from unittest.mock import MagicMock + from leakpro.metrics.attack_result import * +from leakpro.utils.import_helper import Self + class TestMIAResult(unittest.TestCase): - def setUp(self) -> None: + def setUp(self:Self) -> None: """Set up temporary directory and logger for MIAResult.""" self.temp_dir = tempfile.TemporaryDirectory() @@ -30,7 +31,7 @@ def setUp(self) -> None: [ 0.46973035], [-0.1584589 ], [ 0.14289466]]) - + predictions_proba = None threshold = None @@ -39,53 +40,53 @@ def setUp(self) -> None: id = None self.miaresult = MIAResult(predicted_labels = predicted_labels, - true_labels = true_labels, + true_labels = true_labels, signal_values = signal_values, predictions_proba = predictions_proba, threshold = threshold, audit_indices = audit_indices, resultname = resultname, id = id) - - - - self.config = {'random_seed': 1234, 'attack_list': - {'lira': - {'training_data_fraction': 0.5, - 'num_shadow_models': 3, - 'online': True} + + + + self.config = {"random_seed": 1234, "attack_list": + {"lira": + {"training_data_fraction": 0.5, + "num_shadow_models": 3, + "online": True} }, - 'report_log': - './leakpro_output/results', - 'config_log': - './leakpro_output/config', - 'target_model_folder': - './target', - 'attack_folder': - 'attack_objects', - 'attack_type': - 'mia', - 'split_method': - 'no_overlapping' + "report_log": + "./leakpro_output/results", + "config_log": + "./leakpro_output/config", + "target_model_folder": + "./target", + "attack_folder": + "attack_objects", + "attack_type": + "mia", + "split_method": + "no_overlapping" } - def tearDown(self) -> None: + def tearDown(self:Self) -> None: """Clean up temporary directory.""" self.temp_dir.cleanup() - def test_MIAResult_init(self) -> None: + def test_MIAResult_init(self:Self) -> None: """Test the initialization of MIAResult.""" assert self.miaresult.id == None - def test_check_tpr_fpr(self): + def test_check_tpr_fpr(self:Self) -> None: assert np.allclose(self.miaresult.tpr, np.array([0., 0., 0.16666667, 0.5, 1., 1., 1., 1., 1., 1.])) assert self.miaresult.fp.all() == 0. assert self.miaresult.tn.all() == 0. - def test_save_load_MIAResult(self) -> None: - + def test_save_load_MIAResult(self:Self) -> None: + name = "lira" - config_name = get_config_name(self.config['attack_list'][name]) + config_name = get_config_name(self.config["attack_list"][name]) save_path = f"{self.temp_dir}/{name}/{name}{config_name}" # Test saving @@ -99,16 +100,16 @@ def test_save_load_MIAResult(self) -> None: # Test loading with open(f"{save_path}/data.json") as f: data = json.load(f) - + self.miaresult_new = MIAResult(load=True) assert self.miaresult_new.predicted_labels == None assert self.miaresult_new.true_labels == None assert self.miaresult_new.signal_values == None - + self.miaresult_new.load(data) assert np.allclose(self.miaresult_new.tpr, np.array([0., 0., 0.16666667, 0.5, 1., 1., 1., 1., 1., 1.])) - def test_get_strongest_MIAResult(self) -> None: + def test_get_strongest_MIAResult(self:Self) -> None: """Test selecting the strongest attack based on ROC AUC.""" result_1 = MagicMock(roc_auc=0.75) result_2 = MagicMock(roc_auc=0.85) @@ -120,7 +121,7 @@ def test_get_strongest_MIAResult(self) -> None: # The strongest attack should be the one with the highest ROC AUC assert strongest == result_2 - def test_latex(self): + def test_latex(self:Self) -> None: """Test if the LaTeX content is generated correctly.""" result = [MagicMock(id="attack-config-1", resultname="test_attack_1", fixed_fpr_table={"TPR@1.0%FPR": 0.90, "TPR@0.1%FPR": 0.80, "TPR@0.01%FPR": 0.70, "TPR@0.0%FPR": 0.60})] @@ -146,4 +147,4 @@ def test_latex(self): self.assertIn("0.6", latex_content) # Ensure the LaTeX content ends properly - self.assertIn("\\newline\n", latex_content) \ No newline at end of file + self.assertIn("\\newline\n", latex_content) diff --git a/leakpro/tests/test_report_handler/__init__.py b/leakpro/tests/test_report_handler/__init__.py index 796d15a0..7018163f 100644 --- a/leakpro/tests/test_report_handler/__init__.py +++ b/leakpro/tests/test_report_handler/__init__.py @@ -1 +1 @@ -"""Init file for report handler tests""" \ No newline at end of file +"""Init file for report handler tests.""" diff --git a/leakpro/tests/test_report_handler/test_report_handler.py b/leakpro/tests/test_report_handler/test_report_handler.py index 9ed6b952..c0775fca 100644 --- a/leakpro/tests/test_report_handler/test_report_handler.py +++ b/leakpro/tests/test_report_handler/test_report_handler.py @@ -1,27 +1,28 @@ -import unittest -import os -import json import logging -import subprocess +import os import tempfile -from unittest.mock import MagicMock, patch, mock_open, call -from leakpro.reporting.report_handler import ReportHandler +from unittest.mock import MagicMock + from leakpro.metrics.attack_result import * +from leakpro.reporting.report_handler import ReportHandler +from leakpro.utils.import_helper import Self + -class TestReportHandler(unittest.TestCase): +class TestReportHandler(): + """Test class of the ReportHandler.""" - def setUp(self) -> None: + def setUp(self:Self) -> None: """Set up temporary directory and logger for ReportHandler.""" self.temp_dir = tempfile.TemporaryDirectory() - self.logger = logging.getLogger('test_logger') + self.logger = logging.getLogger("test_logger") self.logger.setLevel(logging.INFO) self.report_handler = ReportHandler(report_dir=self.temp_dir.name, logger=self.logger) - def tearDown(self) -> None: + def tearDown(self:Self) -> None: """Clean up temporary directory.""" self.temp_dir.cleanup() - def test_report_handler_initialization(self) -> None: + def test_report_handler_initialization(self:Self) -> None: """Test the initialization of ReportHandler.""" assert self.report_handler is not None assert self.report_handler.report_dir == self.temp_dir.name @@ -29,27 +30,30 @@ def test_report_handler_initialization(self) -> None: types = ["MIAResult", "GIAResults", "SyntheticResult"] assert False not in [_type in types for _type in self.report_handler.leakpro_types] - assert True not in [True if self.report_handler.pdf_results[key] else False for key in self.report_handler.leakpro_types] + assert True not in [bool(self.report_handler.pdf_results[key]) for key in self.report_handler.leakpro_types] assert False not in [_type in globals() for _type in types] - def test_init_pdf(self) -> None: - assert hasattr(self.report_handler, 'latex_content') == False + def test_init_pdf(self:Self) -> None: + """Test the initialization method of the ReportHandler.""" + + if hasattr(self.report_handler, "latex_content"): + raise AssertionError self.report_handler._init_pdf() assert "documentclass" in self.report_handler.latex_content assert "begin" in self.report_handler.latex_content - def test_compile_pdf(self) -> None: + def test_compile_pdf(self:Self) -> None: """Test PDF compilation.""" self.report_handler._init_pdf() self.report_handler._compile_pdf(install_flag=True) assert "end" in self.report_handler.latex_content - assert os.path.isfile(f'{self.report_handler.report_dir}/LeakPro_output.tex') - assert os.path.isfile(f'./LeakPro_output.pdf') + assert os.path.isfile(f"{self.report_handler.report_dir}/LeakPro_output.tex") + assert os.path.isfile("./LeakPro_output.pdf") - def test_get_all_attacknames(self) -> None: + def test_get_all_attacknames(self:Self) -> None: """Test retrieval of all attack names.""" result_mock_1 = MagicMock(resultname="Attack1") result_mock_2 = MagicMock(resultname="Attack2") @@ -59,8 +63,7 @@ def test_get_all_attacknames(self) -> None: assert attack_names == ["Attack1", "Attack2"] - def test_get_results_of_name(self): - + def test_get_results_of_name(self:Self) -> None: """Test retrieval of all attack names.""" result_mock_1 = MagicMock(resultname="Attack1") result_mock_2 = MagicMock(resultname="Attack2") @@ -74,4 +77,4 @@ def test_get_results_of_name(self): assert len(self.report_handler._get_results_of_name(self.report_handler.results, "Attack1")) == 1 assert len(self.report_handler._get_results_of_name(self.report_handler.results, "Attack2")) == 2 - assert len(self.report_handler._get_results_of_name(self.report_handler.results, "Attack3")) == 3 \ No newline at end of file + assert len(self.report_handler._get_results_of_name(self.report_handler.results, "Attack3")) == 3 From 7df0ddcb8eabadea0944bab8906e221c8f1125ac Mon Sep 17 00:00:00 2001 From: henrikfo Date: Tue, 5 Nov 2024 11:34:50 +0000 Subject: [PATCH 06/14] check fixed --- leakpro/attacks/mia_attacks/lira.py | 16 +- leakpro/metrics/attack_result.py | 149 +++++++++--------- leakpro/reporting/report_handler.py | 93 ++++++----- .../test_attack_result/test_attack_result.py | 46 +++--- .../test_report_handler.py | 3 +- pyproject.toml | 2 + 6 files changed, 169 insertions(+), 140 deletions(-) diff --git a/leakpro/attacks/mia_attacks/lira.py b/leakpro/attacks/mia_attacks/lira.py index de1edd7c..c366302e 100755 --- a/leakpro/attacks/mia_attacks/lira.py +++ b/leakpro/attacks/mia_attacks/lira.py @@ -7,7 +7,7 @@ from leakpro.attacks.mia_attacks.abstract_mia import AbstractMIA from leakpro.attacks.utils.boosting import Memorization from leakpro.attacks.utils.shadow_model_handler import ShadowModelHandler -from leakpro.import_helper import Self +from leakpro.input_handler.abstract_input_handler import AbstractInputHandler from leakpro.metrics.attack_result import CombinedMetricResult, MIAResult from leakpro.signals.signal import ModelRescaledLogits from leakpro.utils.import_helper import Self @@ -49,6 +49,7 @@ def _configure_attack(self:Self, configs: dict) -> None: self.training_data_fraction = configs.get("training_data_fraction", 0.5) self.include_train_data = configs.get("include_train_data", self.online) self.include_test_data = configs.get("include_test_data", self.online) + self.eval_batch_size = configs.get("eval_batch_size", 32) # Memorization config # Activate memorization @@ -70,6 +71,7 @@ def _configure_attack(self:Self, configs: dict) -> None: validation_dict = { "num_shadow_models": (self.num_shadow_models, 1, None), "training_data_fraction": (self.training_data_fraction, 0, 1), + "eval_batch_size": (self.eval_batch_size, 1, 1_000_000), "memorization_threshold": (self.memorization_threshold, 0, 1), "min_num_memorization_audit_points": (self.min_num_memorization_audit_points, 1, 1_000_000), "num_memorization_audit_points": (self.num_memorization_audit_points, 0, 1_000_000), @@ -149,7 +151,6 @@ def prepare_attack(self:Self)->None: self.audit_dataset["data"] = self.audit_dataset["data"] self.in_members = self.audit_dataset["in_members"] self.out_members = self.audit_dataset["out_members"] - # mask = [True if indice in self.in_members else False for indice in self.audit_dataset["data"]] # Check offline attack for possible IN- sample(s) if not self.online: @@ -158,15 +159,14 @@ def prepare_attack(self:Self)->None: logger.info(f"Some shadow model(s) contains {count_in_samples} IN samples in total for the model(s)") logger.info("This is not an offline attack!") - self.batch_size = 20000 #int(len(self.audit_dataset["data"])/2) self.logger.info(f"Calculating the logits for all {self.num_shadow_models} shadow models") - self.shadow_models_logits = np.swapaxes(self.signal(self.shadow_models, self.handler, self.audit_dataset["data"],\ - self.batch_size), 0, 1) + self.shadow_models_logits = np.swapaxes(self.signal(self.shadow_models, self.handler, self.audit_dataset["data"], + self.eval_batch_size), 0, 1) # Calculate logits for the target model self.logger.info("Calculating the logits for the target model") - self.target_logits = np.swapaxes(self.signal([self.target_model], self.handler, self.audit_dataset["data"], self.batch_size),\ - 0, 1).squeeze() + self.target_logits = np.swapaxes(self.signal([self.target_model], self.handler, self.audit_dataset["data"], + self.eval_batch_size), 0, 1).squeeze() # Using Memorizationg boosting if self.memorization: @@ -190,7 +190,7 @@ def prepare_attack(self:Self)->None: org_audit_data_length, self.handler, self.online, - self.batch_size, + self.eval_batch_size, ) memorization_mask, _, _ = memorization.run() diff --git a/leakpro/metrics/attack_result.py b/leakpro/metrics/attack_result.py index 3ae62af1..ac1ae0b0 100755 --- a/leakpro/metrics/attack_result.py +++ b/leakpro/metrics/attack_result.py @@ -132,7 +132,7 @@ def __init__( # noqa: PLR0913 self.roc_auc = auc(self.fpr, self.tpr) - def _get_primitives(self:Self): + def _get_primitives(self:Self) -> dict: """Return the primitives of the CombinedMetricResult class.""" return {"predicted_labels": self.predicted_labels.tolist(), "true_labels": self.true_labels.tolist(), @@ -141,7 +141,7 @@ def _get_primitives(self:Self): "threshold": self.threshold.tolist() if isinstance(self.threshold, np.ndarray) else None, } - def save(self:Self, path: str, name: str, config:dict): + def save(self:Self, path: str, name: str, config:dict) -> None: """Save the CombinedMetricResult class to disk.""" # Primitives are the set of variables to re-create the class from scratch @@ -205,6 +205,11 @@ def __init__( # noqa: PLR0913 predictions_proba: Continuous version of the predicted_labels. signal_values: Values of the signal used by the metric. threshold: Threshold computed by the metric. + audit_indices: The connesponding dataset indices for the results + id: The identity of the attack + load: If the data should be loaded + metadata: Metadata about the results + resultname: The name of the attack and result """ @@ -234,7 +239,9 @@ def __init__( # noqa: PLR0913 self.tpr = self.tp / (self.tp + self.fn) self.roc_auc = auc(self.fpr, self.tpr) - def load(self, data): + def load(self:Self, data: dict) -> None: + """Load the MIAResults to disk.""" + self.resultname = data["resultname"] self.resulttype = data["resulttype"] self.tpr = data["tpr"] @@ -247,11 +254,9 @@ def load(self, data): self.true_labels = data["true_labels"] self.threshold = data["threshold"] - def save(self:Self, path: str, name: str, config:dict = None): + def save(self:Self, path: str, name: str, config:dict = None) -> None: """Save the MIAResults to disk.""" - print(config) - result_config = config["attack_list"][name] fixed_fpr_table = get_result_fixed_fpr(self.fpr, self.tpr) @@ -303,16 +308,16 @@ def save(self:Self, path: str, name: str, config:dict = None): threshold = self.threshold ) - @classmethod - def get_strongest(self, results) -> list: + @staticmethod + def get_strongest(results: list) -> list: """Method for selecting the strongest attack.""" return max((res for res in results), key=lambda d: d.roc_auc) - def create_signal_histogram(self, filename, signal_values, true_labels, threshold) -> None: + def create_signal_histogram(self:Self, filename: str, signal_values: list, true_labels: list, threshold: float) -> None: + """Method to create Signal Histogram.""" values = np.array(signal_values).ravel() labels = np.array(true_labels).ravel() - threshold = threshold data = pd.DataFrame( { @@ -350,7 +355,11 @@ def create_signal_histogram(self, filename, signal_values, true_labels, threshol plt.savefig(fname=filename, dpi=1000) plt.clf() - def create_plot(self, results, filename = "", save_name = "") -> None: + @staticmethod + def create_plot(results: list, save_dir: str = "", save_name: str = "") -> None: + """Plot method for MIAResult.""" + + filename = f"{save_dir}/{save_name}" # Create plot for results reduced_labels = reduce_to_unique_labels(results) @@ -378,26 +387,27 @@ def create_plot(self, results, filename = "", save_name = "") -> None: plt.savefig(fname=f"{filename}.png", dpi=1000, bbox_inches="tight") plt.clf() - @classmethod + @staticmethod def create_results( - self: Self, results: list, save_dir: str = "./", save_name: str = "foo", - ): + ) -> str: + """Result method for MIAResult.""" - filename = f"{save_dir}/{save_name}" - - self.create_plot(results, filename, save_name) + MIAResult.create_plot(results, save_dir, save_name) - return self._latex(results, save_name, filename) + return MIAResult._latex(results, save_dir, save_name) - def _latex(self, results, subsection, filename): + @staticmethod + def _latex(results: list, save_dir: str, save_name: str) -> str: """Latex method for MIAResult.""" + filename = f"{save_dir}/{save_name}" + latex_content = "" latex_content += f""" - \\subsection{{{" ".join(subsection.split("_"))}}} + \\subsection{{{" ".join(save_name.split("_"))}}} \\begin{{figure}}[ht] \\includegraphics[width=0.8\\textwidth]{{{filename}.png}} \\end{{figure}} @@ -407,18 +417,19 @@ def _latex(self, results, subsection, filename): \\resizebox{\\linewidth}{!}{% \\begin{tabularx}{\\textwidth}{l c l l l l} Attack name & attack config & TPR: 1.0\\%FPR & 0.1\\%FPR & 0.01\\%FPR & 0.0\\%FPR \\\\ - \\hline + \\hline """ - def config_latex_style(config): + def config_latex_style(config: str) -> str: config = " \\\\ ".join(config.split("-")[1:]) config = "-".join(config.split("_")) return f"""\\shortstack{{{config}}}""" for res in results: config = config_latex_style(res.id) - latex_content += f"""{"-".join(res.resultname.split("_"))} & {config} & {res.fixed_fpr_table["TPR@1.0%FPR"]} & {res.fixed_fpr_table["TPR@0.1%FPR"]} & {res.fixed_fpr_table["TPR@0.01%FPR"]} & {res.fixed_fpr_table["TPR@0.0%FPR"]} \\\\ \\hline - """ + latex_content += f"""{"-".join(res.resultname.split("_"))} & {config} & {res.fixed_fpr_table["TPR@1.0%FPR"]} & + {res.fixed_fpr_table["TPR@0.1%FPR"]} & {res.fixed_fpr_table["TPR@0.01%FPR"]} & + {res.fixed_fpr_table["TPR@0.0%FPR"]} \\\\ \\hline""" latex_content += """ \\end{tabularx} } @@ -442,13 +453,15 @@ def __init__(self: Self, original_data: DataLoader, recreated_data: DataLoader, if load: return - def load(self, data): + def load(self:Self, data: dict) -> None: + """Load the GIAResults from disk.""" + self.original = data["original"] self.resulttype = data["resulttype"] self.recreated = data["recreated"] self.id = data["id"] - def save(self: Self, save_path: str, name: str, config: dict): + def save(self: Self, save_path: str, name: str, config: dict) -> None: """Save the GIAResults to disk.""" result_config = config["attack_list"][name] @@ -496,14 +509,13 @@ def extract_tensors_from_subset(dataset: Dataset) -> Tensor: with open(f"{save_path}/data.json", "w") as f: json.dump(data, f) - pass - - @classmethod - def create_result(self: Self, attack_name: str, save_path: str) -> None: + @staticmethod + def create_result(attack_name: str, save_path: str) -> None: """Result method for GIA.""" - def _latex(attack_name, original, recreated): - latex_content = f""" + def _latex(attack_name: str, original: str, recreated: str) -> str: + """Latex method for GIAResults.""" + return f""" \\subsection{{{" ".join(attack_name.split("_"))}}} \\begin{{figure}}[ht] \\includegraphics[width=0.8\\textwidth]{{{original}}} @@ -515,8 +527,6 @@ def _latex(attack_name, original, recreated): \\caption{{Original}} \\end{{figure}} """ - return latex_content - return _latex(attack_name=attack_name, original=save_path+"recreated_image.png", recreated=save_path+"original_image.png") class SyntheticResult: @@ -524,30 +534,26 @@ class SyntheticResult: def __init__( # noqa: PLR0913 self:Self, + values: list, load: bool = False, - )-> None: - """Initalze Result method - - Args: - ---- + ) -> None: + """Initalze Result method.""" - """ # Initialize values to result object - # self.values = values + self.values = values # Have a method to return if the results are to be loaded if load: return # Create some result - # self.result_values = some_result + self.result_values = self.create_result(self.values) - def load(self, data: dict): + def load(self:Self, data: dict) -> None: """Load the TEMPLATEResult class to disk.""" - # self.result_values = data["some_result"] - pass + self.result_values = data["some_result"] - def save(self:Self, path: str, name: str, config:dict = None): + def save(self:Self, path: str, name: str, config:dict = None) -> None: """Save the TEMPLATEResult class to disk.""" result_config = config["attack_list"][name] @@ -575,30 +581,26 @@ class TEMPLATEResult: def __init__( # noqa: PLR0913 self:Self, + values: list, load: bool = False, - )-> None: - """Initalze Result method - - Args: - ---- + ) -> None: + """Initalze Result method.""" - """ # Initialize values to result object - # self.values = values + self.values = values # Have a method to return if the results are to be loaded if load: return # Create some result - # self.result_values = some_result + self.result_values = self.create_result(self.values) - def load(self, data: dict): + def load(self:Self, data: dict) -> None: """Load the TEMPLATEResult class to disk.""" - # self.result_values = data["some_result"] - pass + self.result_values = data["some_result"] - def save(self:Self, path: str, name: str, config:dict = None): + def save(self:Self, path: str, name: str, config:dict = None) -> None: """Save the TEMPLATEResult class to disk.""" result_config = config["attack_list"][name] @@ -620,18 +622,19 @@ def save(self:Self, path: str, name: str, config:dict = None): with open(f"{path}/{name}/{self.id}/data.json", "w") as f: json.dump(data, f) - @classmethod - def create_result(self, results): + @staticmethod + def create_result(results: list) -> str: """Method for results.""" - def _latex(results): - """Latex method for TEMPLATEResult""" - pass - pass - -def get_result_fixed_fpr(fpr, tpr): + def _latex(results: list) -> str: + """Latex method for TEMPLATEResult.""" + return results + return _latex(results) +def get_result_fixed_fpr(fpr: list, tpr: list) -> dict: + """Find TPR values for fixed TPRs.""" # Function to find TPR at given FPR thresholds - def find_tpr_at_fpr(fpr_array:np.ndarray, tpr_array:np.ndarray, threshold:float): #-> Optional[str]: + def find_tpr_at_fpr(fpr_array:np.ndarray, tpr_array:np.ndarray, threshold:float) -> float: + """Find tpr for a given fpr.""" try: # Find the last index where FPR is less than the threshold valid_index = np.where(fpr_array < threshold)[0][-1] @@ -646,7 +649,8 @@ def find_tpr_at_fpr(fpr_array:np.ndarray, tpr_array:np.ndarray, threshold:float) "TPR@0.01%FPR": find_tpr_at_fpr(fpr, tpr, 0.0001), "TPR@0.0%FPR": find_tpr_at_fpr(fpr, tpr, 0.0)} -def get_config_name(config): +def get_config_name(config: dict) -> str: + """Create id from the attack config.""" config = dict(sorted(config.items())) exclude = ["attack_data_dir"] @@ -661,7 +665,7 @@ def get_config_name(config): config_name += f"-{key}={value}" return config_name -def reduce_to_unique_labels(results): +def reduce_to_unique_labels(results: list) -> list: """Reduce very long labels to unique and distinct ones.""" strings = [res.id for res in results] @@ -675,8 +679,8 @@ def reduce_to_unique_labels(results): config = "-".join(parts[1:]) if len(parts) > 1 else "" # The rest is the configuration name_configs[name].append(config) # Store the configuration under the name - def find_common_suffix(configs): - """Helper function to find the common suffix among multiple configurations""" + def find_common_suffix(configs: list) -> str: + """Helper function to find the common suffix among multiple configurations.""" if not configs: return "" @@ -702,7 +706,8 @@ def find_common_suffix(configs): common_suffix = find_common_suffix(configs) # Remove the common suffix from each configuration - trimmed_configs = [config[:-(len(common_suffix) + 1)] if common_suffix and config.endswith(common_suffix) else config for config in configs] + trimmed_configs = [config[:-(len(common_suffix) + 1)] if common_suffix and config.endswith(common_suffix) + else config for config in configs] # Process configurations based on whether they share the same pattern for config in trimmed_configs: diff --git a/leakpro/reporting/report_handler.py b/leakpro/reporting/report_handler.py index ae6fcc67..9dd4f18f 100644 --- a/leakpro/reporting/report_handler.py +++ b/leakpro/reporting/report_handler.py @@ -1,3 +1,5 @@ +"""Implementation of the Report module.""" + import json import logging import os @@ -24,12 +26,14 @@ def __init__(self:Self, report_dir: str, logger:logging.Logger) -> None: self.pdf_results[key] = [] def save_results(self:Self, attack_name: str, result_data: dict, config: dict) -> None: - """Save attack results.""" + """Save method for results.""" self.logger.info(f"Saving results for {attack_name}") result_data.save(self.report_dir, attack_name, config) - def load_results(self:Self): + def load_results(self:Self) -> None: + """Load method for results.""" + self.results = [] for parentdir in os.scandir(f"{self.report_dir}"): if parentdir.is_dir(): @@ -64,11 +68,11 @@ def load_results(self:Self): except Exception as e: self.logger.info(f"Not able to load data, Error: {e}") - def _get_results_of_name(self:Self, results, resultname_value) -> list: + def _get_results_of_name(self:Self, results: list, resultname_value: str) -> list: indices = [idx for (idx, result) in enumerate(results) if result.resultname == resultname_value] return [results[idx] for idx in indices] - def _get_all_attacknames(self:Self): + def _get_all_attacknames(self:Self) -> list: attack_name_list = [] for result in self.results: if result.resultname not in attack_name_list: @@ -76,6 +80,8 @@ def _get_all_attacknames(self:Self): return attack_name_list def create_results_all(self:Self) -> None: + """Result method to group all attacks.""" + for result_type in self.leakpro_types: try: # Get all results of type "Result" @@ -89,20 +95,24 @@ def create_results_all(self:Self) -> None: # Check if the result type has a 'create_results' method try: result_class = globals().get(result_type) - except: - self.logger.info(f"No {result_type} class could be found or exists") + except Exception as e: + self.logger.info(f"No {result_type} class could be found or exists. Error: {e}") continue if hasattr(result_class, "create_results") and callable(result_class.create_results): # Create all results - merged_result = result_class.create_results(results=results, save_dir=self.report_dir, save_name="all_results") + merged_result = result_class.create_results(results=results, + save_dir=self.report_dir, + save_name="all_results") self.pdf_results[result_type].append(merged_result) except Exception as e: - print("all", e) + self.logger.info(f"Error in results all: {e}") + + def create_results_strong(self:Self) -> None: + """Result method for grouping the strongest attacks.""" - def create_results_strong(self:Self): for result_type in self.leakpro_types: try: # Get all results of type "Result" @@ -115,25 +125,31 @@ def create_results_strong(self:Self): try: result_class = globals().get(result_type) - except: - self.logger.info(f"No {result_type} class could be found or exists") + except Exception as e: + self.logger.info(f"No {result_type} class could be found or exists. Error: {e}") continue # Get all attack names - attack_name_grouped_results = [self._get_results_of_name(results, name) for name in self._get_all_attacknames()] + attack_name_grouped_results = [self._get_results_of_name(results, name) for\ + name in self._get_all_attacknames()] # Get the strongest result for each attack name if hasattr(result_class, "get_strongest") and callable(result_class.get_strongest): - strongest_results = [result_class.get_strongest(result) for result in attack_name_grouped_results] + strongest_results = [result_class.get_strongest(result) for result in \ + attack_name_grouped_results] # Create the strongest results - merged_result = result_class.create_results(results=strongest_results, save_dir=self.report_dir, save_name="strong_results") + merged_result = result_class.create_results(results=strongest_results, + save_dir=self.report_dir, + save_name="strong_results") self.pdf_results[result_type].append(merged_result) except Exception as e: - print("results_strong", e) + self.logger.info(f"Error in results strong: {e}") + + def create_results_attackname_grouped(self:Self) -> None: + """Result method for grouping attacks by name.""" - def create_results_attackname_grouped(self:Self): # Get all attack names all_attack_names = self._get_all_attacknames() @@ -150,8 +166,8 @@ def create_results_attackname_grouped(self:Self): # Check if the result type has a 'create_results' method try: result_class = globals().get(result_type) - except: - self.logger.info(f"No {result_type} class could be found or exists") + except Exception as e: + self.logger.info(f"No {result_type} class could be found or exists. Error: {e}") continue for name in all_attack_names: @@ -161,14 +177,16 @@ def create_results_attackname_grouped(self:Self): attack_results = self._get_results_of_name(results, name) # Create results - merged_result = result_class.create_results(results=attack_results, save_dir=self.report_dir, save_name="grouped_"+name) + merged_result = result_class.create_results(results=attack_results, + save_dir=self.report_dir, + save_name="grouped_"+name) self.pdf_results[result_type].append(merged_result) except Exception as e: - print("create_results_attackname_grouped", e) + self.logger.info(f"Error in results grouped: {e}") - def create_report(self:Self): - """Method to create PDF report""" + def create_report(self:Self) -> None: + """Method to create PDF report.""" # Create initial part of the document. self._init_pdf() @@ -183,16 +201,16 @@ def create_report(self:Self): # Compile the PDF self._compile_pdf() - def _init_pdf(self:Self): + def _init_pdf(self:Self) -> None: self.latex_content = """ \\documentclass{article} \\usepackage{tabularx} \\usepackage{graphicx} - \\usepackage{graphics} + \\usepackage{graphics} \\begin{document} """ - def _compile_pdf(self:Self, install_flag: bool = False): + def _compile_pdf(self:Self) -> None: """Method to compile PDF.""" self.latex_content += """ @@ -201,24 +219,17 @@ def _compile_pdf(self:Self, install_flag: bool = False): with open(f"{self.report_dir}/LeakPro_output.tex", "w") as f: f.write(self.latex_content) - # Check if pdflatex is installed - try: - check = subprocess.check_output(["which", "pdflatex"], universal_newlines=True) - assert "pdflatex" in check - except: - # Option to install pdflatex - self.logger.info('Could not find pdflatex installed\nPlease install pdflatex with "apt install texlive-latex-base"') - choice = input("Do you want to install pdflatex? (Y/n): ").lower() - if (choice in {"y", "yes"} or install_flag==True): - proc = subprocess.Popen(["apt", "install", "-y", "texlive-latex-base"], stdout=subprocess.DEVNULL) - proc.communicate() - - # Compile PDF if possible try: + # Check if pdflatex is installed + check = subprocess.check_output(["which", "pdflatex"], universal_newlines=True) # noqa: S607 S603 + if "pdflatex" not in check: + self.logger.info("Could not find pdflatex installed\ + \nPlease install pdflatex with apt install texlive-latex-base") + cmd = ["pdflatex", "-interaction", "nonstopmode", f"{self.report_dir}/LeakPro_output.tex"] - proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL) + proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL) # noqa: S603 proc.communicate() self.logger.info("PDF compiled") + except Exception as e: - print(e) - self.logger.info("Could not compile PDF") + self.logger.info(f"Could not compile PDF: {e}") diff --git a/leakpro/tests/test_attack_result/test_attack_result.py b/leakpro/tests/test_attack_result/test_attack_result.py index 56d3f2ae..58390cbb 100644 --- a/leakpro/tests/test_attack_result/test_attack_result.py +++ b/leakpro/tests/test_attack_result/test_attack_result.py @@ -1,14 +1,19 @@ +"""Tests for the attack_result module.""" + import json import os import tempfile import unittest from unittest.mock import MagicMock -from leakpro.metrics.attack_result import * +import numpy as np + +from leakpro.metrics.attack_result import MIAResult, get_config_name from leakpro.utils.import_helper import Self class TestMIAResult(unittest.TestCase): + """Test class for MIAResult.""" def setUp(self:Self) -> None: """Set up temporary directory and logger for MIAResult.""" @@ -74,16 +79,19 @@ def tearDown(self:Self) -> None: """Clean up temporary directory.""" self.temp_dir.cleanup() - def test_MIAResult_init(self:Self) -> None: + def test_miaresult_init(self:Self) -> None: """Test the initialization of MIAResult.""" - assert self.miaresult.id == None + assert self.miaresult.id is None def test_check_tpr_fpr(self:Self) -> None: + """Test fpr and tpr.""" + assert np.allclose(self.miaresult.tpr, np.array([0., 0., 0.16666667, 0.5, 1., 1., 1., 1., 1., 1.])) assert self.miaresult.fp.all() == 0. assert self.miaresult.tn.all() == 0. - def test_save_load_MIAResult(self:Self) -> None: + def test_save_load_miaresult(self:Self) -> None: + """Test load and save functionality.""" name = "lira" config_name = get_config_name(self.config["attack_list"][name]) @@ -102,14 +110,14 @@ def test_save_load_MIAResult(self:Self) -> None: data = json.load(f) self.miaresult_new = MIAResult(load=True) - assert self.miaresult_new.predicted_labels == None - assert self.miaresult_new.true_labels == None - assert self.miaresult_new.signal_values == None + assert self.miaresult_new.predicted_labels is None + assert self.miaresult_new.true_labels is None + assert self.miaresult_new.signal_values is None self.miaresult_new.load(data) assert np.allclose(self.miaresult_new.tpr, np.array([0., 0., 0.16666667, 0.5, 1., 1., 1., 1., 1., 1.])) - def test_get_strongest_MIAResult(self:Self) -> None: + def test_get_strongest_miaresult(self:Self) -> None: """Test selecting the strongest attack based on ROC AUC.""" result_1 = MagicMock(roc_auc=0.75) result_2 = MagicMock(roc_auc=0.85) @@ -124,27 +132,29 @@ def test_get_strongest_MIAResult(self:Self) -> None: def test_latex(self:Self) -> None: """Test if the LaTeX content is generated correctly.""" - result = [MagicMock(id="attack-config-1", resultname="test_attack_1", fixed_fpr_table={"TPR@1.0%FPR": 0.90, "TPR@0.1%FPR": 0.80, "TPR@0.01%FPR": 0.70, "TPR@0.0%FPR": 0.60})] + result = [MagicMock(id="attack-config-1", resultname="test_attack_1",\ + fixed_fpr_table={"TPR@1.0%FPR": 0.90, "TPR@0.1%FPR": 0.80, "TPR@0.01%FPR": 0.70, "TPR@0.0%FPR": 0.60})] + subsection = "attack_comparison" filename = f"{self.temp_dir}/test.png" latex_content = MIAResult(load=True)._latex(result, subsection, filename) # Check that the subsection is correctly included - self.assertIn("\\subsection{attack comparison}", latex_content) + assert "\\subsection{attack comparison}" in latex_content # Check that the figure is correctly included - self.assertIn(f"\\includegraphics[width=0.8\\textwidth]{{{filename}.png}}", latex_content) + assert f"\\includegraphics[width=0.8\\textwidth]{{{filename}.png}}" in latex_content # Check that the table header is correct - self.assertIn("Attack name & attack config & TPR: 1.0\\%FPR & 0.1\\%FPR & 0.01\\%FPR & 0.0\\%FPR", latex_content) + assert "Attack name & attack config & TPR: 1.0\\%FPR & 0.1\\%FPR & 0.01\\%FPR & 0.0\\%FPR" in latex_content # Check if the results for mock_result are included correctly - self.assertIn("test-attack-1", latex_content) - self.assertIn("0.9", latex_content) - self.assertIn("0.8", latex_content) - self.assertIn("0.7", latex_content) - self.assertIn("0.6", latex_content) + assert "test-attack-1" in latex_content + assert "0.9" in latex_content + assert "0.8" in latex_content + assert "0.7" in latex_content + assert "0.6" in latex_content # Ensure the LaTeX content ends properly - self.assertIn("\\newline\n", latex_content) + assert "\\newline\n" in latex_content diff --git a/leakpro/tests/test_report_handler/test_report_handler.py b/leakpro/tests/test_report_handler/test_report_handler.py index c0775fca..28f273f8 100644 --- a/leakpro/tests/test_report_handler/test_report_handler.py +++ b/leakpro/tests/test_report_handler/test_report_handler.py @@ -1,9 +1,10 @@ +"""Tests for the report_handler module.""" + import logging import os import tempfile from unittest.mock import MagicMock -from leakpro.metrics.attack_result import * from leakpro.reporting.report_handler import ReportHandler from leakpro.utils.import_helper import Self diff --git a/pyproject.toml b/pyproject.toml index edffb97b..f679d7ef 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,8 @@ lint.select = [ exclude = [ ".venv", "./tests", + "./leakpro/tests", + "./examples", ] lint.ignore = [ From b15b8a749b85e9bbfb9ae55c64f6415131d58837 Mon Sep 17 00:00:00 2001 From: henrikfo Date: Wed, 20 Nov 2024 00:37:18 +0000 Subject: [PATCH 07/14] Added notebook example for report handler --- leakpro/metrics/attack_result.py | 567 +++++++++++++++--- leakpro/reporting/report_handler.py | 220 +++---- leakpro/synthetic_data_attacks/plots.py | 16 +- .../singling_out_utils.py | 107 +++- 4 files changed, 694 insertions(+), 216 deletions(-) diff --git a/leakpro/metrics/attack_result.py b/leakpro/metrics/attack_result.py index ac1ae0b0..5bb72131 100755 --- a/leakpro/metrics/attack_result.py +++ b/leakpro/metrics/attack_result.py @@ -239,22 +239,30 @@ def __init__( # noqa: PLR0913 self.tpr = self.tp / (self.tp + self.fn) self.roc_auc = auc(self.fpr, self.tpr) - def load(self:Self, data: dict) -> None: + + @staticmethod + def load(data: dict) -> None: """Load the MIAResults to disk.""" - self.resultname = data["resultname"] - self.resulttype = data["resulttype"] - self.tpr = data["tpr"] - self.fpr = data["fpr"] - self.roc_auc = data["roc_auc"] - self.config = data["config"] - self.fixed_fpr_table = data["fixed_fpr"] - self.audit_indices = data["audit_indices"] - self.signal_values = data["signal_values"] - self.true_labels = data["true_labels"] - self.threshold = data["threshold"] - - def save(self:Self, path: str, name: str, config:dict = None) -> None: + miaresult = MIAResult(load=True) + + miaresult.resultname = data["resultname"] + miaresult.resulttype = data["resulttype"] + miaresult.tpr = data["tpr"] + miaresult.fpr = data["fpr"] + miaresult.roc_auc = data["roc_auc"] + miaresult.config = data["config"] + miaresult.fixed_fpr_table = data["fixed_fpr"] + miaresult.audit_indices = data["audit_indices"] + miaresult.signal_values = data["signal_values"] + miaresult.true_labels = data["true_labels"] + miaresult.threshold = data["threshold"] + + miaresult.id = data["id"] + + return miaresult + + def save(self:Self, path: str, name: str, config:dict = None, show_plot:bool = False) -> None: """Save the MIAResults to disk.""" result_config = config["attack_list"][name] @@ -297,7 +305,8 @@ def save(self:Self, path: str, name: str, config:dict = None) -> None: temp_res.fpr = self.fpr temp_res.id = self.id self.create_plot(results = [temp_res], - filename = filename + filename = filename, + show_plot = show_plot ) # Create SignalHistogram plot for MIAResult @@ -305,7 +314,8 @@ def save(self:Self, path: str, name: str, config:dict = None) -> None: self.create_signal_histogram(filename = filename, signal_values = self.signal_values, true_labels = self.true_labels, - threshold = self.threshold + threshold = self.threshold, + show_plot = show_plot, ) @staticmethod @@ -313,7 +323,13 @@ def get_strongest(results: list) -> list: """Method for selecting the strongest attack.""" return max((res for res in results), key=lambda d: d.roc_auc) - def create_signal_histogram(self:Self, filename: str, signal_values: list, true_labels: list, threshold: float) -> None: + def create_signal_histogram( + self:Self, filename: str, + signal_values: list, + true_labels: list, + threshold: float, + show_plot: bool = False, + ) -> None: """Method to create Signal Histogram.""" values = np.array(signal_values).ravel() @@ -353,10 +369,18 @@ def create_signal_histogram(self:Self, filename: str, signal_values: list, true_ plt.ylabel("Number of samples") plt.title("Signal histogram") plt.savefig(fname=filename, dpi=1000) - plt.clf() + if show_plot: + plt.show() + else: + plt.clf() @staticmethod - def create_plot(results: list, save_dir: str = "", save_name: str = "") -> None: + def create_plot( + results: list, + save_dir: str = "", + save_name: str = "", + show_plot: bool = False + ) -> None: """Plot method for MIAResult.""" filename = f"{save_dir}/{save_name}" @@ -385,34 +409,79 @@ def create_plot(results: list, save_dir: str = "", save_name: str = "") -> None: plt.ylabel("True positive rate (TPR)") plt.title(save_name+"ROC Curve") plt.savefig(fname=f"{filename}.png", dpi=1000, bbox_inches="tight") - plt.clf() + + if show_plot: + plt.show() + else: + plt.clf() + + @staticmethod + def _get_all_attacknames( + results: list + ) -> list: + attack_name_list = [] + for result in results: + if result.resultname not in attack_name_list: + attack_name_list.append(result.resultname) + return attack_name_list + + @staticmethod + def _get_results_of_name( + results: list, + resultname_value: str + ) -> list: + indices = [idx for (idx, result) in enumerate(results) if result.resultname == resultname_value] + return [results[idx] for idx in indices] @staticmethod def create_results( results: list, save_dir: str = "./", save_name: str = "foo", + show_plot: bool = False, ) -> str: """Result method for MIAResult.""" + latex = "" + + # Create plot for all results + MIAResult.create_plot(results, save_dir, save_name="all_results", show_plot=show_plot) + latex += MIAResult._latex(results, save_dir, save_name="all_results") + + # Create plot for results grouped by name + all_attack_names = MIAResult._get_all_attacknames(results) + for name in all_attack_names: + results_name_grouped = MIAResult._get_results_of_name(results, name) + MIAResult.create_plot(results_name_grouped, save_dir, save_name=name, show_plot=show_plot) + latex += MIAResult._latex(results_name_grouped, save_dir, save_name=name) - MIAResult.create_plot(results, save_dir, save_name) + # Create plot for results grouped by name + grouped_results = [MIAResult._get_results_of_name(results, name) for name + in all_attack_names] + strongest_results = [MIAResult.get_strongest(result) for result in grouped_results] + MIAResult.create_plot(strongest_results, save_dir, save_name="strongest", show_plot=show_plot) + latex += MIAResult._latex(strongest_results, save_dir, save_name="strongest") - return MIAResult._latex(results, save_dir, save_name) + return latex @staticmethod - def _latex(results: list, save_dir: str, save_name: str) -> str: + def _latex( + results: list, + save_dir: str, + save_name: str + ) -> str: """Latex method for MIAResult.""" filename = f"{save_dir}/{save_name}" - latex_content = "" - latex_content += f""" + # Input mia results image + latex_content = f""" \\subsection{{{" ".join(save_name.split("_"))}}} \\begin{{figure}}[ht] \\includegraphics[width=0.8\\textwidth]{{{filename}.png}} \\end{{figure}} """ + # Initialize latex table latex_content += """ \\resizebox{\\linewidth}{!}{% \\begin{tabularx}{\\textwidth}{l c l l l l} @@ -420,11 +489,13 @@ def _latex(results: list, save_dir: str, save_name: str) -> str: \\hline """ + # Convert config to latex table input def config_latex_style(config: str) -> str: config = " \\\\ ".join(config.split("-")[1:]) config = "-".join(config.split("_")) return f"""\\shortstack{{{config}}}""" + # Append all mia results to table for res in results: config = config_latex_style(res.id) latex_content += f"""{"-".join(res.resultname.split("_"))} & {config} & {res.fixed_fpr_table["TPR@1.0%FPR"]} & @@ -442,8 +513,16 @@ def config_latex_style(config: str) -> str: class GIAResults: """Contains results for a GIA attack.""" - def __init__(self: Self, original_data: DataLoader, recreated_data: DataLoader, - psnr_score: float, data_mean: float, data_std: float, load: bool) -> None: + def __init__( + self: Self, + original_data: DataLoader, + recreated_data: DataLoader, + psnr_score: float, + data_mean: float, + data_std: float, + load: bool + ) -> None: + self.original_data = original_data self.recreated_data = recreated_data self.PSNR_score = psnr_score @@ -453,7 +532,10 @@ def __init__(self: Self, original_data: DataLoader, recreated_data: DataLoader, if load: return - def load(self:Self, data: dict) -> None: + def load( + self:Self, + data: dict + ) -> None: """Load the GIAResults from disk.""" self.original = data["original"] @@ -461,7 +543,13 @@ def load(self:Self, data: dict) -> None: self.recreated = data["recreated"] self.id = data["id"] - def save(self: Self, save_path: str, name: str, config: dict) -> None: + def save( + self: Self, + save_path: str, + name: str, + config: dict, + show_plot: bool = False + ) -> None: """Save the GIAResults to disk.""" result_config = config["attack_list"][name] @@ -493,6 +581,15 @@ def extract_tensors_from_subset(dataset: Dataset) -> Tensor: original = os.path.join(save_path, "original_image.png") save_image(gt_denormalized, original) + if show_plot: + # Plot output + plt.plot(output_denormalized) + plt.show() + + # Plot ground truth + plt.plot(gt_denormalized) + plt.show() + # Data to be saved data = { "resulttype": self.__class__.__name__, @@ -510,13 +607,21 @@ def extract_tensors_from_subset(dataset: Dataset) -> Tensor: json.dump(data, f) @staticmethod - def create_result(attack_name: str, save_path: str) -> None: + def create_results( + results: list, + save_dir: str = "./", + save_name: str = "foo", + ) -> str: """Result method for GIA.""" - def _latex(attack_name: str, original: str, recreated: str) -> str: + def _latex( + save_name: str, + original: str, + recreated: str + ) -> str: """Latex method for GIAResults.""" return f""" - \\subsection{{{" ".join(attack_name.split("_"))}}} + \\subsection{{{" ".join(save_name.split("_"))}}} \\begin{{figure}}[ht] \\includegraphics[width=0.8\\textwidth]{{{original}}} \\caption{{Original}} @@ -527,63 +632,347 @@ def _latex(attack_name: str, original: str, recreated: str) -> str: \\caption{{Original}} \\end{{figure}} """ - return _latex(attack_name=attack_name, original=save_path+"recreated_image.png", recreated=save_path+"original_image.png") - -class SyntheticResult: - """Contains results related to the performance of the metric. It contains the results for multiple fpr.""" - - def __init__( # noqa: PLR0913 - self:Self, - values: list, - load: bool = False, - ) -> None: - """Initalze Result method.""" + return _latex(save_name=save_name, original=save_dir+"recreated_image.png", recreated=save_dir+"original_image.png") + +# class SyntheticResult: +# """Contains results for SyntheticResult.""" + +# def __init__( # noqa: PLR0913 +# self:Self, +# SynRes: Union[SinglingOutResults, LinkabilityResults, InferenceResults], +# load: bool = False, +# ) -> None: +# """Initalze SyntheticResult method.""" + +# # Initialize values to result object +# self.SynRes = SynRes + +# # Have a method to return if the results are to be loaded +# if load: +# return + +# # Create some result +# self.result_values = self.create_result(self.values) + +# def load( +# self:Self, +# data: dict +# ) -> None: +# """Load the SyntheticResult class to disk.""" +# self.result_values = data["some_result"] + +# def save( +# self:Self, +# save_path: str, +# save_name: str, +# config:dict = None +# ) -> None: +# """Save the SyntheticResult class to disk.""" + +# result_config = config["attack_list"][name] + +# # Get the name for the attack configuration +# config_name = get_config_name(result_config) +# self.id = f"{name}{config_name}" +# save_path = f"{path}/{name}/{self.id}" + +# save_name = os.path.join(save_path, f"synthetic.png") + +# SyntheticResult.plot( +# res=self.SynRes, +# show=False, +# save=True, +# save_path=save_path, +# save_name=save_name, +# ) + +# # Data to be saved +# data = { +# "synthetic_result_name": self.SynRes.__class__.__name__, +# "synthetic_result": self.SynRes, +# "image_path": save_name, +# "id": self.id +# } + +# # Check if path exists, otherwise create it. +# if not os.path.exists(f"{save_path}"): +# os.makedirs(f"{save_path}") + +# # Save the results to a file +# with open(f"{save_path}/data.json", "w") as f: +# json.dump(data, f) + +# @staticmethod +# def create_results( +# results: list, +# save_dir: str = "./", +# save_name: str = "foo", +# ) -> str: +# """Result method for SyntheticResult.""" + +# def _latex(save_name: str, result_file: str) -> str: +# """Latex method for SyntheticResult.""" +# return f""" +# \\subsection{{{" ".join(save_name.split("_"))}}} +# \\begin{{figure}}[ht] +# \\includegraphics[width=0.8\\textwidth]{{{result_file}}} +# \\caption{{Original}} +# \\end{{figure}} +# """ +# return _latex(results=results, save_name=save_name, result_file=save_dir+"synthetic.png") + +# @staticmethod +# def plot( +# res: Union[SinglingOutResults, LinkabilityResults, InferenceResults], +# high_res_flag: bool = True, +# case_flag: str = "base", +# show:bool = True, +# save:bool = False, +# save_path:str = "./", +# save_name:str = "fig.png", +# ) -> None: + +# save_name = os.path.join(save_path, save_name) + +# SyntheticResultName = res.__class__.__name__ +# if SyntheticResultName == "SinglingOutResults": +# SyntheticResult.plot_singling_out(sin_out_res=res, +# high_res_flag = high_res_flag, +# show=show, +# save=save, +# save_name=save_name) + +# elif SyntheticResultName == "LinkabilityResults": +# SyntheticResult.plot_linkability(link_res=res, +# high_res_flag = high_res_flag, +# show=show, +# save=save, +# save_name=save_name) + +# elif SyntheticResultName == "InferenceResults": +# if case_flag == "base": +# SyntheticResult.plot_ir_base_case(inf_res=res, +# high_res_flag = high_res_flag, +# show=show, +# save=save, +# save_name=save_name) +# elif case_flag == "worst": +# SyntheticResult.plot_ir_worst_case(inf_res=res, +# high_res_flag = high_res_flag, +# show=show, +# save=save, +# save_name=save_name) +# else: +# print("No such case") + +# def plot_ir_base_case( +# *, +# inf_res: InferenceResults, +# high_res_flag: bool = True, +# show: bool = True, +# save: bool = False, +# save_name: str = None, +# ) -> None: +# """Function to plot inference results base case given results. + +# Note: function is not tested and is used in examples. +# """ +# #Set res, secrets, set_secrets and set_nr_aux_cols +# res = np.array(inf_res.res) +# secrets = np.array(inf_res.secrets) +# set_secrets = sorted(set(secrets)) +# set_nr_aux_cols = np.unique(res[:,-1].astype(int)) +# # High res flag +# if high_res_flag: +# plot_save_high_res() +# # Set up the figure and get axes +# fig_title = f"Inference risk, base case scenario, {conf_level} confidence, total attacks: {int(res[:,0].sum())}" +# axs = get_figure_axes(two_axes_flag=True, fig_title=fig_title) +# # Set plot variables +# titles = ["Risk per column", "Risk per Nr aux cols"] +# xlabels = ["Secret col", "Nr aux cols"] +# sets_values = [set_secrets, set_nr_aux_cols] +# valueses = [secrets, res[:,-1]] +# assert len(axs) == len(titles) +# assert len(axs) == len(xlabels) +# assert len(axs) == len(sets_values) +# assert len(axs) == len(valueses) +# #Plotting +# for ax, title, xlabel, set_values, values in zip(axs, titles, xlabels, sets_values, valueses): +# set_labels_and_title( +# ax = ax, +# xlabel = xlabel, +# ylabel = "Risk", +# title = title +# ) +# # Iterate through values and plot bar charts +# iterate_values_plot_bar_charts(ax=ax, res=res, set_values=set_values, values=values) +# # Adding ticks +# set_ticks(ax=ax, xlabels=set_values) +# # Adding legend +# set_legend(ax=ax) +# # Save plot +# if save: +# plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") +# # Show plot +# if show: +# plt.show() +# else: +# plt.clf() + +# def plot_ir_worst_case( +# *, +# inf_res: InferenceResults, +# high_res_flag: bool = True, +# show: bool = True, +# save: bool = False, +# save_name: str = None, +# ) -> None: +# """Function to plot inference results worst case given results. + +# Note: function is not tested and is used in examples. +# """ +# #Set res, secrets and set_secrets +# res = np.array(inf_res.res) +# secrets = np.array(inf_res.secrets) +# set_secrets = sorted(set(secrets)) +# # High res flag +# if high_res_flag: +# plot_save_high_res() +# # Set up the figure and get axes +# ax = get_figure_axes() +# # Iterate through secrets and plot bar charts +# iterate_values_plot_bar_charts( +# ax = ax, +# res = res, +# set_values = set_secrets, +# values = secrets, +# max_value_flag = True +# ) +# # Adding labels and title +# set_labels_and_title( +# ax = ax, +# xlabel = "Secret col", +# ylabel = "Risk", +# title = f"Inference risk, worst case scenario, total attacks: {int(res[:,0].sum())}" +# ) +# # Adding ticks +# set_ticks(ax=ax, xlabels=set_secrets) +# # Adding legend +# set_legend(ax=ax) +# # Save plot +# if save: +# plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") +# # Show plot +# if show: +# plt.show() +# else: +# plt.clf() + +# @staticmethod +# def plot_linkability( +# *, +# link_res:LinkabilityResults, +# high_res_flag: bool = False, +# show: bool = True, +# save: bool = False, +# save_name: str = None, +# ) -> None: +# """Function to plot linkability results from given res. + +# Note: function is not tested and is used in examples. +# """ +# # Get res and aux_cols_nr +# res = np.array(link_res.res) +# set_nr_aux_cols = np.unique(res[:,-1].astype(int)) +# # High res flag +# if high_res_flag: +# plot_save_high_res() +# # Set up the figure and get axes +# ax = get_figure_axes() +# # Iterate through nr of columns and plot bar charts +# iterate_values_plot_bar_charts(ax=ax, res=res, set_values=set_nr_aux_cols, values=res[:, -1]) +# # Adding labels and title +# set_labels_and_title( +# ax = ax, +# xlabel = "Nr aux cols", +# ylabel = "Risk", +# title = f"Linkability risk {conf_level} confidence, total attacks: {int(res[:,0].sum())}" +# ) +# # Adding ticks +# set_ticks(ax=ax, xlabels=set_nr_aux_cols) +# # Adding legend +# set_legend(ax=ax) +# # Save plot +# if save: +# plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") +# # Show plot +# if show: +# plt.show() +# else: +# plt.clf() + +# @staticmethod +# def plot_singling_out( +# *, +# sin_out_res: SinglingOutResults, +# high_res_flag: bool = True, +# show: bool = True, +# save: bool = False, +# save_name: str = None, +# ) -> None: +# """Function to plot singling out given results. + +# Note: function is not tested and is used in examples. +# """ +# #Set res, n_cols and set_n_cols +# res = np.array(sin_out_res.res) +# n_cols = res[:,-1].astype(int).tolist() +# set_n_cols = np.unique(n_cols) +# # High res flag +# if high_res_flag: +# plot_save_high_res() +# # Set up the figure and get axes +# ax = get_figure_axes() +# # Iterate through values and plot bar charts +# iterate_values_plot_bar_charts( +# ax = ax, +# res = res, +# set_values = set_n_cols, +# values = n_cols, +# max_value_flag = True +# ) +# # Adding labels and title +# fig_title = f"Singling out risk total attacks: {int(res[:,0].sum())}" +# if res.shape[0]==1: +# fig_title += f", n_cols={int(res[0,-1])}" +# set_labels_and_title( +# ax = ax, +# xlabel = "n_cols for predicates", +# ylabel = "Risk", +# title = fig_title +# ) +# # Adding ticks +# set_ticks(ax=ax, xlabels=set_n_cols) +# # Adding legend +# if save: +# plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") +# # Show plot +# if show: +# plt.show() +# else: +# plt.clf() - # Initialize values to result object - self.values = values - - # Have a method to return if the results are to be loaded - if load: - return - - # Create some result - self.result_values = self.create_result(self.values) - - def load(self:Self, data: dict) -> None: - """Load the TEMPLATEResult class to disk.""" - self.result_values = data["some_result"] - - def save(self:Self, path: str, name: str, config:dict = None) -> None: - """Save the TEMPLATEResult class to disk.""" - - result_config = config["attack_list"][name] - - # Data to be saved - data = { - "some_result": self.result_values - } - - # Get the name for the attack configuration - config_name = get_config_name(result_config) - self.id = f"{name}{config_name}" - save_path = f"{path}/{name}/{self.id}" - - # Check if path exists, otherwise create it. - if not os.path.exists(f"{save_path}"): - os.makedirs(f"{save_path}") - # Save the results to a file - with open(f"{save_path}/data.json", "w") as f: - json.dump(data, f) class TEMPLATEResult: """Contains results related to the performance of the metric. It contains the results for multiple fpr.""" def __init__( # noqa: PLR0913 - self:Self, - values: list, - load: bool = False, - ) -> None: + self:Self, + values: list, + load: bool = False, + ) -> None: """Initalze Result method.""" # Initialize values to result object @@ -596,11 +985,19 @@ def __init__( # noqa: PLR0913 # Create some result self.result_values = self.create_result(self.values) - def load(self:Self, data: dict) -> None: + def load( + self:Self, + data: dict + ) -> None: """Load the TEMPLATEResult class to disk.""" self.result_values = data["some_result"] - def save(self:Self, path: str, name: str, config:dict = None) -> None: + def save( + self:Self, + path: str, + name: str, + config:dict = None + ) -> None: """Save the TEMPLATEResult class to disk.""" result_config = config["attack_list"][name] @@ -623,7 +1020,7 @@ def save(self:Self, path: str, name: str, config:dict = None) -> None: json.dump(data, f) @staticmethod - def create_result(results: list) -> str: + def create_results(results: list) -> str: """Method for results.""" def _latex(results: list) -> str: """Latex method for TEMPLATEResult.""" diff --git a/leakpro/reporting/report_handler.py b/leakpro/reporting/report_handler.py index 9dd4f18f..ec6e3621 100644 --- a/leakpro/reporting/report_handler.py +++ b/leakpro/reporting/report_handler.py @@ -5,31 +5,53 @@ import os import subprocess -from leakpro.utils.import_helper import Self +from leakpro.metrics.attack_result import GIAResults, MIAResult +from leakpro.synthetic_data_attacks.singling_out_utils import SinglingOutResults +from leakpro.utils.import_helper import Self, Union +from leakpro.utils.logger import setup_logger # Report Handler class ReportHandler(): """Implementation of the report handler.""" - def __init__(self:Self, report_dir: str, logger:logging.Logger) -> None: - self.logger = logger - self.report_dir = report_dir + def __init__(self:Self, report_dir: str = None, logger:logging.Logger = None) -> None: + self.logger = setup_logger() if logger is None else logger + self.logger.info("Initializing report handler...") + + self.report_dir = self._try_find_rep_dir() if report_dir is None else report_dir + self.logger.info(f"report_dir set to: {self.report_dir}") + self.pdf_results = {} self.leakpro_types = ["MIAResult", "GIAResults", - "SyntheticResult" + "SinglingOutResults", ] # Initiate empty lists for the different types of LeakPro attack types for key in self.leakpro_types: self.pdf_results[key] = [] - def save_results(self:Self, attack_name: str, result_data: dict, config: dict) -> None: + def _try_find_rep_dir(self): + save_path = "../leakpro_output/results" + # Check if path exists, otherwise create it. + for _ in range(3): + if os.path.exists(save_path): + return save_path + save_path = "../"+save_path + + # If no result folder can be found + if not os.path.exists(save_path): + save_path = "../../leakpro_output/results" + os.makedirs(save_path) + return save_path + + + def save_results(self:Self, attack_name: str = None, result_data: Union[MIAResult, GIAResults, SinglingOutResults] = None, config: dict = None) -> None: """Save method for results.""" self.logger.info(f"Saving results for {attack_name}") - result_data.save(self.report_dir, attack_name, config) + result_data.save(path=self.report_dir, name=attack_name, config=config) def load_results(self:Self) -> None: """Load method for results.""" @@ -54,136 +76,81 @@ def load_results(self:Self) -> None: raise ValueError(f"Class '{resulttype}' not found.") # Initialize the class using the saved primitives - instance = cls(load=True) - instance.load(data) + # instance = cls(load=True) + data["id"] = subdir.name + instance = cls.load(data) - if instance.id is None: - instance.id = subdir.name + # if instance.id is None: + # instance.id = subdir.name - if instance.resultname is None: - instance.resultname = parentdir.name + # if instance.resultname is None: + # instance.resultname = parentdir.name self.results.append(instance) except Exception as e: self.logger.info(f"Not able to load data, Error: {e}") - def _get_results_of_name(self:Self, results: list, resultname_value: str) -> list: - indices = [idx for (idx, result) in enumerate(results) if result.resultname == resultname_value] - return [results[idx] for idx in indices] - - def _get_all_attacknames(self:Self) -> list: - attack_name_list = [] - for result in self.results: - if result.resultname not in attack_name_list: - attack_name_list.append(result.resultname) - return attack_name_list - - def create_results_all(self:Self) -> None: + def create_results( + self:Self, + types: list = [], + ) -> None: """Result method to group all attacks.""" - for result_type in self.leakpro_types: - try: - # Get all results of type "Result" - results = [res for res in self.results if res.resulttype == result_type] - - # If no results of type "result_type" is found, skip to next result_type - if not results: - self.logger.info(f"No results of type {result_type} found.") - continue - - # Check if the result type has a 'create_results' method - try: - result_class = globals().get(result_type) - except Exception as e: - self.logger.info(f"No {result_type} class could be found or exists. Error: {e}") - continue - - if hasattr(result_class, "create_results") and callable(result_class.create_results): - - # Create all results - merged_result = result_class.create_results(results=results, - save_dir=self.report_dir, - save_name="all_results") - self.pdf_results[result_type].append(merged_result) - - except Exception as e: - self.logger.info(f"Error in results all: {e}") + for result_type in types: + # try: + # Get all results of type "Result" + # results = [res for res in self.results if res.resulttype == result_type] + results = [res for res in self.results if res.__class__.__name__ == result_type] - def create_results_strong(self:Self) -> None: - """Result method for grouping the strongest attacks.""" + # If no results of type "result_type" is found, skip to next result_type + if not results: + self.logger.info(f"No results of type {result_type} found.") + continue - for result_type in self.leakpro_types: + # Check if the result type has a 'create_results' method try: - # Get all results of type "Result" - results = [res for res in self.results if res.resulttype == result_type] - - # If no results of type "result_type" is found, skip to next result_type - if not results: - self.logger.info(f"No 'strong' results of type {result_type} found.") - continue - - try: - result_class = globals().get(result_type) - except Exception as e: - self.logger.info(f"No {result_type} class could be found or exists. Error: {e}") - continue - - # Get all attack names - attack_name_grouped_results = [self._get_results_of_name(results, name) for\ - name in self._get_all_attacknames()] - - # Get the strongest result for each attack name - if hasattr(result_class, "get_strongest") and callable(result_class.get_strongest): - strongest_results = [result_class.get_strongest(result) for result in \ - attack_name_grouped_results] - - # Create the strongest results - merged_result = result_class.create_results(results=strongest_results, - save_dir=self.report_dir, - save_name="strong_results") - self.pdf_results[result_type].append(merged_result) - + result_class = globals().get(result_type) except Exception as e: - self.logger.info(f"Error in results strong: {e}") - - def create_results_attackname_grouped(self:Self) -> None: - """Result method for grouping attacks by name.""" - - # Get all attack names - all_attack_names = self._get_all_attacknames() - - for result_type in self.leakpro_types: - - # Get all results of type "Result" - results = [res for res in self.results if res.resulttype == result_type] - - # If no results of type "result_type" is found, skip to next result_type - if not results: - self.logger.info(f"No results of type {result_type} to group.") - continue - - # Check if the result type has a 'create_results' method - try: - result_class = globals().get(result_type) - except Exception as e: - self.logger.info(f"No {result_type} class could be found or exists. Error: {e}") - continue - - for name in all_attack_names: - if hasattr(result_class, "create_results") and callable(result_class.create_results): - try: - # Get result for each attack names - attack_results = self._get_results_of_name(results, name) - - # Create results - merged_result = result_class.create_results(results=attack_results, - save_dir=self.report_dir, - save_name="grouped_"+name) - self.pdf_results[result_type].append(merged_result) - - except Exception as e: - self.logger.info(f"Error in results grouped: {e}") + self.logger.info(f"No {result_type} class could be found or exists. Error: {e}") + continue + + if hasattr(result_class, "create_results") and callable(result_class.create_results): + + # Create all results + latex_results = result_class.create_results(results=results, + save_dir=self.report_dir, + ) + self.pdf_results[result_type].append(latex_results) + + # except Exception as e: + # self.logger.info(f"Error in results all: {result_class}, {e}") + + def create_results_all( + self:Self, + ) -> None: + """Method to create all types of results.""" + self.create_results(types=self.leakpro_types) + + def create_results_mia( + self:Self, + ) -> None: + """Method to create MIAResult results.""" + self.create_results(types=["MIAResult"]) + + def create_results_gia( + self:Self, + ) -> None: + """Method to create GIAResults results.""" + self.create_results(types=["GIAResults"]) + + def create_results_syn( + self:Self, + ) -> None: + """Method to create Synthetic results.""" + self.create_results(types=["SinglingOutResults", + "InferenceResults", + "LinkabilityResults"]) def create_report(self:Self) -> None: """Method to create PDF report.""" @@ -196,7 +163,7 @@ def create_report(self:Self) -> None: if len(self.pdf_results[result_type]) > 0: self.latex_content += f"""\\section{{{result_type}}}""" for res in self.pdf_results[result_type]: - self.latex_content += res + self.latex_content += res # Compile the PDF self._compile_pdf() @@ -226,10 +193,11 @@ def _compile_pdf(self:Self) -> None: self.logger.info("Could not find pdflatex installed\ \nPlease install pdflatex with apt install texlive-latex-base") - cmd = ["pdflatex", "-interaction", "nonstopmode", f"{self.report_dir}/LeakPro_output.tex"] - proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL) # noqa: S603 + cmd = ["pdflatex", "-interaction", "nonstopmode", "LeakPro_output.tex"] + proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, cwd=f"{self.report_dir}") # noqa: S603 proc.communicate() self.logger.info("PDF compiled") except Exception as e: self.logger.info(f"Could not compile PDF: {e}") + self.logger.info("Make sure to install pdflatex with apt install texlive-latex-base") diff --git a/leakpro/synthetic_data_attacks/plots.py b/leakpro/synthetic_data_attacks/plots.py index 8e7305f4..4540f474 100755 --- a/leakpro/synthetic_data_attacks/plots.py +++ b/leakpro/synthetic_data_attacks/plots.py @@ -189,7 +189,13 @@ def plot_ir_base_case(*, inf_res: InferenceResults, high_res_flag: bool = True) plt.tight_layout() plt.show() -def plot_singling_out(*, sin_out_res: SinglingOutResults, high_res_flag: bool = True) -> None: +def plot_singling_out(*, + sin_out_res: SinglingOutResults, + high_res_flag: bool = True, + show: bool = True, + save: bool = False, + save_name: str = None + ) -> None: """Function to plot singling out given results. Note: function is not tested and is used in examples. @@ -225,5 +231,11 @@ def plot_singling_out(*, sin_out_res: SinglingOutResults, high_res_flag: bool = set_ticks(ax=ax, xlabels=set_n_cols) # Adding legend set_legend(ax=ax) + + if save: + plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") # Show plot - plt.show() + if show: + plt.show() + else: + plt.clf() diff --git a/leakpro/synthetic_data_attacks/singling_out_utils.py b/leakpro/synthetic_data_attacks/singling_out_utils.py index 6db9ffa0..f3c302ef 100755 --- a/leakpro/synthetic_data_attacks/singling_out_utils.py +++ b/leakpro/synthetic_data_attacks/singling_out_utils.py @@ -1,7 +1,9 @@ """Singling-out risk util functions.""" +import json import multiprocessing as mp +import os from itertools import repeat -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Self, Tuple, Union from pandas import DataFrame from pydantic import BaseModel @@ -24,6 +26,103 @@ class SinglingOutResults(BaseModel): res_cols: List[str] res: List[List[Union[int,float]]] + prefix: str + dataset: str + + def save(self:Self, path:str = "../leakpro_output/results/", name: str = "singling_out", config:dict = None) -> None: # noqa: ARG002 + """Save method for SinglingOutResults.""" + + id = f"{self.prefix}"+f"_{self.dataset}" + + # Data to be saved + data = { + "resulttype": self.__class__.__name__, + "resultname": name, + "res": self.model_dump(), + "id": id, + } + + # Check if path exists, otherwise create it. + for _ in range(3): + if os.path.exists(path): + break + path = "../"+path + + # If no result folder can be found + if not os.path.exists(path): + os.makedirs("../../leakpro_output/results/") + + # Save the results to a file + if not os.path.exists(f"{path}/{name}/{id}"): + os.makedirs(f"{path}/{name}/{id}") + + with open(f"{path}/{name}/{id}/data.json", "w") as f: + json.dump(data, f) + + from leakpro.synthetic_data_attacks.plots import plot_singling_out + plot_singling_out(sin_out_res=SinglingOutResults(res=self.res, + res_cols=self.res_cols, + prefix=self.prefix, + dataset=self.dataset), + show=False, + save=True, + save_name=f"{path}/{name}/{id}/{self.prefix}", + ) + + @staticmethod + def load(data: dict) -> None: + """Load method for SinglingOutResults.""" + return SinglingOutResults(res=data["res"]["res"], + res_cols=data["res"]["res_cols"], + dataset=data["res"]["dataset"], + prefix=data["res"]["prefix"] + ) + + def plot(self:Self, + high_res_flag:bool = False, + show:bool = True, + save:bool = False, + save_path:str = "./", + save_name:str = "fig.png", + ) -> None: + """Plot method for SinglingOutResults.""" + from leakpro.synthetic_data_attacks.plots import plot_singling_out + plot_singling_out(sin_out_res=SinglingOutResults(res=self.res, + res_cols=self.res_cols, + prefix=self.prefix, + dataset=self.dataset), + high_res_flag=high_res_flag, + show = show, + save = save, + save_name = f"{save_path}/{save_name}", + ) + + @staticmethod + def create_results( + results: list, + save_dir: str = "./", + ) -> str: + """Result method for SinglingOutResults.""" + latex = "" + + def _latex( + save_dir: str, + save_name: str, + ) -> str: + """Latex method for SinglingOutResults.""" + + filename = f"{save_dir}/{save_name}.png" + return f""" + \\subsection{{{" ".join(save_name.split("_"))}}} + \\begin{{figure}}[ht] + \\includegraphics[width=0.8\\textwidth]{{{filename}}} + \\caption{{Original}} + \\end{{figure}} + """ + for res in results: + res.plot(show=False, save=True, save_path=save_dir, save_name=res.prefix) + latex += _latex(save_dir=save_dir, save_name=res.prefix) + return latex def check_for_int_value(*, x: int) -> None: """Auxiliary function to check a given integer value.""" @@ -48,7 +147,7 @@ def aux_singling_out_risk_evaluation(**kwargs: Any) -> Tuple[Optional[Union[int, verbose = kwargs.pop("verbose") #Get n_cols n_cols = kwargs["n_cols"] - #Return non if n_cols==2 + #Return non if n'_cols==2 #Note: this is because n_cols==2 takes A LOT of time. Seems algorithm is not good for predicates with len==2 if n_cols == 2: return None, None @@ -155,7 +254,9 @@ def singling_out_risk_evaluation( #Instantiate SinglingOutResults sin_out_res = SinglingOutResults( res_cols = res_cols, - res = res + res = res, + prefix = get_singling_out_prefix(n_cols=n_cols), + dataset = dataset, ) #Save results to json if save_results_json: From b9117633f5853ab0704c8b92096e736af799630c Mon Sep 17 00:00:00 2001 From: henrikfo Date: Wed, 20 Nov 2024 00:39:09 +0000 Subject: [PATCH 08/14] added example ... --- .../___report_handler_anomalies.ipynb | 418 ++++++++++++++++++ .../report_handler_anomalies.ipynb | 203 +++++++++ 2 files changed, 621 insertions(+) create mode 100644 examples/report_handler/___report_handler_anomalies.ipynb create mode 100644 examples/report_handler/report_handler_anomalies.ipynb diff --git a/examples/report_handler/___report_handler_anomalies.ipynb b/examples/report_handler/___report_handler_anomalies.ipynb new file mode 100644 index 00000000..3411063a --- /dev/null +++ b/examples/report_handler/___report_handler_anomalies.ipynb @@ -0,0 +1,418 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "95d5acad-514e-4950-94a0-c80d789d9364", + "metadata": {}, + "source": [ + "# Report handler examples" + ] + }, + { + "cell_type": "markdown", + "id": "71f5dbe9", + "metadata": {}, + "source": [ + "Install leakpro as ``` pip install -e /path/to/leakpro ```" + ] + }, + { + "cell_type": "markdown", + "id": "68b48ce8", + "metadata": {}, + "source": [ + "### Synthetic examples" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bcf529c7-8bfe-49da-9889-59111ec2cd73", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "\n", + "import pandas as pd\n", + "\n", + "sys.path.append(\"../..\")\n", + "\n", + "from leakpro.synthetic_data_attacks import plots\n", + "from leakpro.synthetic_data_attacks.anomalies import return_anomalies\n", + "from leakpro.synthetic_data_attacks.inference_utils import inference_risk_evaluation\n", + "from leakpro.synthetic_data_attacks.linkability_utils import linkability_risk_evaluation\n", + "from leakpro.synthetic_data_attacks.singling_out_utils import singling_out_risk_evaluation\n", + "# from leakpro.metrics.attack_result import SyntheticResult\n", + "\n", + "#Get ori and syn\n", + "n_samples = 100\n", + "DATA_PATH = \"../synthetic_data/datasets/\"\n", + "ori = pd.read_csv(os.path.join(DATA_PATH, \"adults_ori.csv\"), nrows=n_samples)\n", + "syn = pd.read_csv(os.path.join(DATA_PATH, \"adults_syn.csv\"), nrows=n_samples)" + ] + }, + { + "cell_type": "markdown", + "id": "62c44504-a5fa-4846-8132-53877f369825", + "metadata": {}, + "source": [ + "### Get anomalies of synthetic data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0d25b9e7-03a7-4320-a456-6153774cb82c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=64)]: Using backend ThreadingBackend with 64 concurrent workers.\n", + "[Parallel(n_jobs=64)]: Done 2 out of 64 | elapsed: 0.9s remaining: 28.1s\n", + "[Parallel(n_jobs=64)]: Done 64 out of 64 | elapsed: 4.1s finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unique predictions (array([-1, 1]), array([ 2, 98]))\n", + "Syn anom shape (2, 14)\n" + ] + } + ], + "source": [ + "syn_anom = return_anomalies(df=syn, n_estimators=1000, n_jobs=-1, verbose=True)\n", + "print(\"Syn anom shape\",syn_anom.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ad69ece9", + "metadata": {}, + "outputs": [], + "source": [ + "sin_out_res = singling_out_risk_evaluation(\n", + " dataset = \"adults\",\n", + " ori = ori,\n", + " syn = syn_anom,\n", + " n_attacks = syn_anom.shape[0]\n", + ")\n", + "# save_path = sin_out_res.save()\n", + "# print(save_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5e8fe664", + "metadata": {}, + "outputs": [], + "source": [ + "# from leakpro.synthetic_data_attacks.singling_out_utils import SinglingOutResults\n", + "# import json\n", + "\n", + "# with open(\"../../leakpro_output/results/singling_out/singling_out_n_cols_all_adults/data.json\") as f:\n", + "# data = json.load(f)\n", + "# syn_loaded = SinglingOutResults.load(data=data)\n", + " \n", + "# syn_loaded.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "373dcc8a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-20 00:09:42,566 INFO Initializing report handler...\n", + "2024-11-20 00:09:42,567 INFO report_dir set to: ../../leakpro_output/results\n", + "2024-11-20 00:09:42,569 INFO Saving results for singling_out\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "../../leakpro_output/results\n" + ] + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from leakpro.reporting.report_handler import ReportHandler\n", + "report_handler = ReportHandler()\n", + "\n", + "report_handler.save_results(attack_name=\"singling_out\", result_data=sin_out_res)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1d91c7e0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-19 23:38:04,520 INFO No results of type GIAResults found.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "../../leakpro_output/results\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-19 23:38:14,966 INFO PDF compiled\n" + ] + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "report_handler.load_results()\n", + "report_handler.create_results_all()\n", + "\n", + "report_handler.create_report()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89fcbe8a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "f0f7288c-a380-43df-97cb-fb0152e410a2", + "metadata": {}, + "source": [ + "### Singling-out risk analysis, Linkability riks analysis with anomalies and Inference risk, worst and base case" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "49ff9bb3-e7f4-4d3a-8759-382eed697893", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Singling-out\n", + "sin_out_res = singling_out_risk_evaluation(\n", + " dataset = \"adults\",\n", + " ori = ori,\n", + " syn = syn_anom,\n", + " n_attacks = syn_anom.shape[0]\n", + ")\n", + "SyntheticResult.plot(res=sin_out_res,\n", + " high_res_flag=False,\n", + " save=True,\n", + " save_path=\"./outputs\",\n", + " save_name=\"Singling-out\"\n", + " )\n", + "\n", + "# Linkability\n", + "link_res = linkability_risk_evaluation(\n", + " dataset = \"adults\",\n", + " ori = ori,\n", + " syn = syn_anom,\n", + " n_samples = syn_anom.shape[0],\n", + " n_attacks = 100\n", + ")\n", + "SyntheticResult.plot(res=link_res,\n", + " high_res_flag=False,\n", + " save=True,\n", + " save_path=\"./outputs\",\n", + " save_name=\"Linkability\"\n", + " )\n", + "\n", + "# Inference risk, base case\n", + "inf_res = inference_risk_evaluation(\n", + " dataset = \"adults\",\n", + " ori = ori,\n", + " syn = syn_anom,\n", + " worst_case_flag = False,\n", + " n_attacks = syn_anom.shape[0]\n", + ")\n", + "SyntheticResult.plot(res=inf_res,\n", + " high_res_flag=False,\n", + " case_flag=\"base\",\n", + " save=True,\n", + " save_path=\"./outputs\",\n", + " save_name=\"Inference_base_case\"\n", + " )\n", + "\n", + "# Inference risk, worst case\n", + "inf_res_worst = inference_risk_evaluation(\n", + " dataset = \"adults\",\n", + " ori = ori,\n", + " syn = syn_anom,\n", + " worst_case_flag = True,\n", + " n_attacks = syn_anom.shape[0]\n", + ")\n", + "SyntheticResult.plot(res=inf_res_worst,\n", + " high_res_flag=False,\n", + " case_flag=\"worst\",\n", + " save=True,\n", + " save_path=\"./outputs\",\n", + " save_name=\"Inference_worst_case\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "9f9e01d4", + "metadata": {}, + "source": [ + "## Save and store results" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0e0c6c09", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "ReportHandler.__init__() missing 1 required positional argument: 'logger'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[11], line 7\u001b[0m\n\u001b[1;32m 3\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(path)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mleakpro\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mreporting\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mreport_handler\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ReportHandler\n\u001b[0;32m----> 7\u001b[0m reporthandler \u001b[38;5;241m=\u001b[39m \u001b[43mReportHandler\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mreport_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m reporthandler\u001b[38;5;241m.\u001b[39msave_results(\n\u001b[1;32m 12\u001b[0m attack_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msynthetic_inference\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 13\u001b[0m result_data\u001b[38;5;241m=\u001b[39minf_res,\n\u001b[1;32m 14\u001b[0m config\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcase\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbase\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[1;32m 15\u001b[0m )\n", + "\u001b[0;31mTypeError\u001b[0m: ReportHandler.__init__() missing 1 required positional argument: 'logger'" + ] + } + ], + "source": [ + "path = \"../../leakpro_output\"\n", + "if not os.path.exists(path):\n", + " os.makedirs(path)\n", + " \n", + "from leakpro.reporting.report_handler import ReportHandler\n", + "\n", + "reporthandler = ReportHandler(\n", + " report_dir=path,\n", + " )\n", + "\n", + "reporthandler.save_results(\n", + " attack_name=\"synthetic_inference\",\n", + " result_data=inf_res,\n", + " config={\"case\": \"base\"}\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f9c3942", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/report_handler/report_handler_anomalies.ipynb b/examples/report_handler/report_handler_anomalies.ipynb new file mode 100644 index 00000000..a32fbdda --- /dev/null +++ b/examples/report_handler/report_handler_anomalies.ipynb @@ -0,0 +1,203 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "95d5acad-514e-4950-94a0-c80d789d9364", + "metadata": {}, + "source": [ + "# Report handler examples" + ] + }, + { + "cell_type": "markdown", + "id": "71f5dbe9", + "metadata": {}, + "source": [ + "Install leakpro as ``` pip install -e /path/to/leakpro ```" + ] + }, + { + "cell_type": "markdown", + "id": "68b48ce8", + "metadata": {}, + "source": [ + "### Synthetic examples" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bcf529c7-8bfe-49da-9889-59111ec2cd73", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "\n", + "import pandas as pd\n", + "\n", + "sys.path.append(\"../..\")\n", + "\n", + "from leakpro.synthetic_data_attacks import plots\n", + "from leakpro.synthetic_data_attacks.anomalies import return_anomalies\n", + "from leakpro.synthetic_data_attacks.inference_utils import inference_risk_evaluation\n", + "from leakpro.synthetic_data_attacks.linkability_utils import linkability_risk_evaluation\n", + "from leakpro.synthetic_data_attacks.singling_out_utils import singling_out_risk_evaluation\n", + "# from leakpro.metrics.attack_result import SyntheticResult\n", + "\n", + "#Get ori and syn\n", + "n_samples = 100\n", + "DATA_PATH = \"../synthetic_data/datasets/\"\n", + "ori = pd.read_csv(os.path.join(DATA_PATH, \"adults_ori.csv\"), nrows=n_samples)\n", + "syn = pd.read_csv(os.path.join(DATA_PATH, \"adults_syn.csv\"), nrows=n_samples)" + ] + }, + { + "cell_type": "markdown", + "id": "62c44504-a5fa-4846-8132-53877f369825", + "metadata": {}, + "source": [ + "### Get anomalies of synthetic data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad69ece9", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a result\n", + "\n", + "syn_anom = return_anomalies(df=syn, n_estimators=1000, n_jobs=-1, verbose=True)\n", + "print(\"Syn anom shape\",syn_anom.shape)\n", + "\n", + "sin_out_res = singling_out_risk_evaluation(\n", + " dataset = \"adults\",\n", + " ori = ori,\n", + " syn = syn_anom,\n", + " n_attacks = syn_anom.shape[0]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "373dcc8a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-20 00:09:42,566 INFO Initializing report handler...\n", + "2024-11-20 00:09:42,567 INFO report_dir set to: ../../leakpro_output/results\n", + "2024-11-20 00:09:42,569 INFO Saving results for singling_out\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "../../leakpro_output/results\n" + ] + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Import and initialize ReportHandler\n", + "from leakpro.reporting.report_handler import ReportHandler\n", + "report_handler = ReportHandler()\n", + "\n", + "# Save the result using the ReportHandler\n", + "report_handler.save_results(attack_name=\"singling_out\", result_data=sin_out_res)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d91c7e0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-19 23:38:04,520 INFO No results of type GIAResults found.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "../../leakpro_output/results\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-19 23:38:14,966 INFO PDF compiled\n" + ] + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Use the ReportHandler and load all the saved results\n", + "report_handler.load_results()\n", + "\n", + "# Create results and collect corresponding latex texts\n", + "report_handler.create_results_all()\n", + "\n", + "# Create the report by compiling the latex text\n", + "report_handler.create_report()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 91882483701c950855a57e781a2bd0362a3e4e95 Mon Sep 17 00:00:00 2001 From: henrikfo Date: Fri, 22 Nov 2024 11:59:03 +0000 Subject: [PATCH 09/14] Added MIAResults to all mia attacks --- leakpro/attacks/mia_attacks/HSJ.py | 8 ++++---- leakpro/attacks/mia_attacks/attack_p.py | 4 ++-- leakpro/attacks/mia_attacks/loss_trajectory.py | 10 +++++----- leakpro/attacks/mia_attacks/qmia.py | 6 +++--- leakpro/attacks/mia_attacks/rmia.py | 6 +++--- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/leakpro/attacks/mia_attacks/HSJ.py b/leakpro/attacks/mia_attacks/HSJ.py index 166572f1..f8a946d8 100755 --- a/leakpro/attacks/mia_attacks/HSJ.py +++ b/leakpro/attacks/mia_attacks/HSJ.py @@ -4,7 +4,7 @@ from leakpro.attacks.mia_attacks.abstract_mia import AbstractMIA from leakpro.input_handler.abstract_input_handler import AbstractInputHandler -from leakpro.metrics.attack_result import CombinedMetricResult +from leakpro.metrics.attack_result import MIAResult from leakpro.signals.signal import HopSkipJumpDistance from leakpro.utils.import_helper import Self from leakpro.utils.logger import logger @@ -169,12 +169,12 @@ def prepare_attack(self:Self) -> None: - def run_attack(self:Self) -> CombinedMetricResult: + def run_attack(self:Self) -> MIAResult: """Run the attack and return the combined metric result. Returns ------- - CombinedMetricResult: The combined metric result containing predicted labels, true labels, + MIAResult: The Result containing predicted labels, true labels, predictions probabilities, and signal values. """ @@ -217,7 +217,7 @@ def run_attack(self:Self) -> CombinedMetricResult: ) # compute ROC, TP, TN etc - return CombinedMetricResult( + return MIAResult( predicted_labels=member_preds, true_labels=true_labels, predictions_proba=None, diff --git a/leakpro/attacks/mia_attacks/attack_p.py b/leakpro/attacks/mia_attacks/attack_p.py index baa0bc6a..7ff4d3c9 100755 --- a/leakpro/attacks/mia_attacks/attack_p.py +++ b/leakpro/attacks/mia_attacks/attack_p.py @@ -5,7 +5,7 @@ from leakpro.attacks.mia_attacks.abstract_mia import AbstractMIA from leakpro.attacks.utils.threshold_computation import linear_itp_threshold_func from leakpro.input_handler.abstract_input_handler import AbstractInputHandler -from leakpro.metrics.attack_result import CombinedMetricResult +from leakpro.metrics.attack_result import MIAResult from leakpro.signals.signal import ModelLoss from leakpro.utils.import_helper import Self from leakpro.utils.logger import logger @@ -136,7 +136,7 @@ def run_attack(self:Self) -> CombinedMetricResult: ) # compute ROC, TP, TN etc - return CombinedMetricResult( + return MIAResult( predicted_labels=predictions, true_labels=true_labels, predictions_proba=None, diff --git a/leakpro/attacks/mia_attacks/loss_trajectory.py b/leakpro/attacks/mia_attacks/loss_trajectory.py index 5e78ee0e..072b5aa9 100755 --- a/leakpro/attacks/mia_attacks/loss_trajectory.py +++ b/leakpro/attacks/mia_attacks/loss_trajectory.py @@ -13,7 +13,7 @@ from leakpro.attacks.utils.distillation_model_handler import DistillationModelHandler from leakpro.attacks.utils.shadow_model_handler import ShadowModelHandler from leakpro.input_handler.abstract_input_handler import AbstractInputHandler -from leakpro.metrics.attack_result import CombinedMetricResult +from leakpro.metrics.attack_result import MIAResult from leakpro.signals.signal import ModelLogits from leakpro.utils.import_helper import Self from leakpro.utils.logger import logger @@ -402,7 +402,7 @@ def mia_attack(self:Self, attack_model:nn.Module) -> tuple: return auc_ground_truth, member_preds - def run_attack(self:Self) -> CombinedMetricResult: + def run_attack(self:Self) -> MIAResult: """Run the attack and return the combined metric result. Returns @@ -418,9 +418,9 @@ def run_attack(self:Self) -> CombinedMetricResult: signals = np.random.rand(*true_labels.shape) # compute ROC, TP, TN etc - return CombinedMetricResult( - predicted_labels= predictions, + return MIAResult( + predicted_labels=predictions, true_labels=true_labels, predictions_proba=None, signal_values=signals, - ) + ) \ No newline at end of file diff --git a/leakpro/attacks/mia_attacks/qmia.py b/leakpro/attacks/mia_attacks/qmia.py index aa297a1c..9628a1d9 100755 --- a/leakpro/attacks/mia_attacks/qmia.py +++ b/leakpro/attacks/mia_attacks/qmia.py @@ -10,7 +10,7 @@ from leakpro.attacks.mia_attacks.abstract_mia import AbstractMIA from leakpro.input_handler.abstract_input_handler import AbstractInputHandler -from leakpro.metrics.attack_result import CombinedMetricResult +from leakpro.metrics.attack_result import MIAResult from leakpro.signals.signal import ModelRescaledLogits from leakpro.utils.import_helper import Any, Self, Tuple from leakpro.utils.logger import logger @@ -285,7 +285,7 @@ def _train_quantile_regressor( # Move the model back to the CPU self.quantile_regressor.to("cpu") - def run_attack(self:Self) -> CombinedMetricResult: + def run_attack(self:Self) -> MIAResult: """Run the attack on the target model and dataset. Args: @@ -329,7 +329,7 @@ def run_attack(self:Self) -> CombinedMetricResult: ) # compute ROC, TP, TN etc - return CombinedMetricResult( + return MIAResult( predicted_labels=predictions, true_labels=true_labels, predictions_proba=None, diff --git a/leakpro/attacks/mia_attacks/rmia.py b/leakpro/attacks/mia_attacks/rmia.py index 9c78d98e..f46b6fea 100755 --- a/leakpro/attacks/mia_attacks/rmia.py +++ b/leakpro/attacks/mia_attacks/rmia.py @@ -6,7 +6,7 @@ from leakpro.attacks.utils.shadow_model_handler import ShadowModelHandler from leakpro.attacks.utils.utils import softmax_logits from leakpro.input_handler.abstract_input_handler import AbstractInputHandler -from leakpro.metrics.attack_result import CombinedMetricResult +from leakpro.metrics.attack_result import MIAResult from leakpro.signals.signal import ModelLogits from leakpro.utils.import_helper import Self from leakpro.utils.logger import logger @@ -279,7 +279,7 @@ def _offline_attack(self:Self) -> None: self.in_member_signals = score[in_members].reshape(-1,1) self.out_member_signals = score[out_members].reshape(-1,1) - def run_attack(self:Self) -> CombinedMetricResult: + def run_attack(self:Self) -> MIAResult: """Run the attack on the target model and dataset. Returns @@ -315,7 +315,7 @@ def run_attack(self:Self) -> CombinedMetricResult: ) # compute ROC, TP, TN etc - return CombinedMetricResult( + return MIAResult( predicted_labels=predictions, true_labels=true_labels, predictions_proba=None, From 2baec49174bc1cb150d5f07f53cb8ae9eae4a381 Mon Sep 17 00:00:00 2001 From: henrikfo Date: Mon, 2 Dec 2024 01:03:40 +0000 Subject: [PATCH 10/14] pre-merge --- .../___report_handler_anomalies.ipynb | 418 ------------- examples/report_handler/gia_utils/cifar.py | 26 + examples/report_handler/gia_utils/model.py | 81 +++ examples/report_handler/mia_utils/audit.yaml | 56 ++ .../report_handler/mia_utils/cifar_handler.py | 67 +++ .../mia_utils/train_config.yaml | 17 + .../mia_utils/utils/cifar_data_preparation.py | 111 ++++ .../utils/cifar_model_preparation.py | 117 ++++ examples/report_handler/report_handler.ipynb | 562 ++++++++++++++++++ .../report_handler_anomalies.ipynb | 203 ------- .../attacks/gia_attacks/invertinggradients.py | 4 +- leakpro/attacks/mia_attacks/attack_p.py | 2 +- leakpro/attacks/mia_attacks/lira.py | 8 +- .../attacks/mia_attacks/loss_trajectory.py | 2 +- leakpro/leakpro.py | 21 +- leakpro/metrics/attack_result.py | 495 +++------------ leakpro/reporting/report_handler.py | 79 +-- leakpro/run.py | 3 +- .../synthetic_data_attacks/inference_utils.py | 102 +++- .../linkability_utils.py | 79 ++- leakpro/synthetic_data_attacks/plots.py | 61 +- .../singling_out_utils.py | 2 +- leakpro/synthetic_data_attacks/utils.py | 22 +- 23 files changed, 1429 insertions(+), 1109 deletions(-) delete mode 100644 examples/report_handler/___report_handler_anomalies.ipynb create mode 100644 examples/report_handler/gia_utils/cifar.py create mode 100644 examples/report_handler/gia_utils/model.py create mode 100644 examples/report_handler/mia_utils/audit.yaml create mode 100644 examples/report_handler/mia_utils/cifar_handler.py create mode 100644 examples/report_handler/mia_utils/train_config.yaml create mode 100644 examples/report_handler/mia_utils/utils/cifar_data_preparation.py create mode 100644 examples/report_handler/mia_utils/utils/cifar_model_preparation.py create mode 100644 examples/report_handler/report_handler.ipynb delete mode 100644 examples/report_handler/report_handler_anomalies.ipynb diff --git a/examples/report_handler/___report_handler_anomalies.ipynb b/examples/report_handler/___report_handler_anomalies.ipynb deleted file mode 100644 index 3411063a..00000000 --- a/examples/report_handler/___report_handler_anomalies.ipynb +++ /dev/null @@ -1,418 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "95d5acad-514e-4950-94a0-c80d789d9364", - "metadata": {}, - "source": [ - "# Report handler examples" - ] - }, - { - "cell_type": "markdown", - "id": "71f5dbe9", - "metadata": {}, - "source": [ - "Install leakpro as ``` pip install -e /path/to/leakpro ```" - ] - }, - { - "cell_type": "markdown", - "id": "68b48ce8", - "metadata": {}, - "source": [ - "### Synthetic examples" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "bcf529c7-8bfe-49da-9889-59111ec2cd73", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import sys\n", - "\n", - "import pandas as pd\n", - "\n", - "sys.path.append(\"../..\")\n", - "\n", - "from leakpro.synthetic_data_attacks import plots\n", - "from leakpro.synthetic_data_attacks.anomalies import return_anomalies\n", - "from leakpro.synthetic_data_attacks.inference_utils import inference_risk_evaluation\n", - "from leakpro.synthetic_data_attacks.linkability_utils import linkability_risk_evaluation\n", - "from leakpro.synthetic_data_attacks.singling_out_utils import singling_out_risk_evaluation\n", - "# from leakpro.metrics.attack_result import SyntheticResult\n", - "\n", - "#Get ori and syn\n", - "n_samples = 100\n", - "DATA_PATH = \"../synthetic_data/datasets/\"\n", - "ori = pd.read_csv(os.path.join(DATA_PATH, \"adults_ori.csv\"), nrows=n_samples)\n", - "syn = pd.read_csv(os.path.join(DATA_PATH, \"adults_syn.csv\"), nrows=n_samples)" - ] - }, - { - "cell_type": "markdown", - "id": "62c44504-a5fa-4846-8132-53877f369825", - "metadata": {}, - "source": [ - "### Get anomalies of synthetic data" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "0d25b9e7-03a7-4320-a456-6153774cb82c", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=64)]: Using backend ThreadingBackend with 64 concurrent workers.\n", - "[Parallel(n_jobs=64)]: Done 2 out of 64 | elapsed: 0.9s remaining: 28.1s\n", - "[Parallel(n_jobs=64)]: Done 64 out of 64 | elapsed: 4.1s finished\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Unique predictions (array([-1, 1]), array([ 2, 98]))\n", - "Syn anom shape (2, 14)\n" - ] - } - ], - "source": [ - "syn_anom = return_anomalies(df=syn, n_estimators=1000, n_jobs=-1, verbose=True)\n", - "print(\"Syn anom shape\",syn_anom.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ad69ece9", - "metadata": {}, - "outputs": [], - "source": [ - "sin_out_res = singling_out_risk_evaluation(\n", - " dataset = \"adults\",\n", - " ori = ori,\n", - " syn = syn_anom,\n", - " n_attacks = syn_anom.shape[0]\n", - ")\n", - "# save_path = sin_out_res.save()\n", - "# print(save_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "5e8fe664", - "metadata": {}, - "outputs": [], - "source": [ - "# from leakpro.synthetic_data_attacks.singling_out_utils import SinglingOutResults\n", - "# import json\n", - "\n", - "# with open(\"../../leakpro_output/results/singling_out/singling_out_n_cols_all_adults/data.json\") as f:\n", - "# data = json.load(f)\n", - "# syn_loaded = SinglingOutResults.load(data=data)\n", - " \n", - "# syn_loaded.plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "373dcc8a", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-11-20 00:09:42,566 INFO Initializing report handler...\n", - "2024-11-20 00:09:42,567 INFO report_dir set to: ../../leakpro_output/results\n", - "2024-11-20 00:09:42,569 INFO Saving results for singling_out\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "../../leakpro_output/results\n" - ] - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from leakpro.reporting.report_handler import ReportHandler\n", - "report_handler = ReportHandler()\n", - "\n", - "report_handler.save_results(attack_name=\"singling_out\", result_data=sin_out_res)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "1d91c7e0", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-11-19 23:38:04,520 INFO No results of type GIAResults found.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "../../leakpro_output/results\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-11-19 23:38:14,966 INFO PDF compiled\n" - ] - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "report_handler.load_results()\n", - "report_handler.create_results_all()\n", - "\n", - "report_handler.create_report()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "89fcbe8a", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "f0f7288c-a380-43df-97cb-fb0152e410a2", - "metadata": {}, - "source": [ - "### Singling-out risk analysis, Linkability riks analysis with anomalies and Inference risk, worst and base case" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "49ff9bb3-e7f4-4d3a-8759-382eed697893", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Singling-out\n", - "sin_out_res = singling_out_risk_evaluation(\n", - " dataset = \"adults\",\n", - " ori = ori,\n", - " syn = syn_anom,\n", - " n_attacks = syn_anom.shape[0]\n", - ")\n", - "SyntheticResult.plot(res=sin_out_res,\n", - " high_res_flag=False,\n", - " save=True,\n", - " save_path=\"./outputs\",\n", - " save_name=\"Singling-out\"\n", - " )\n", - "\n", - "# Linkability\n", - "link_res = linkability_risk_evaluation(\n", - " dataset = \"adults\",\n", - " ori = ori,\n", - " syn = syn_anom,\n", - " n_samples = syn_anom.shape[0],\n", - " n_attacks = 100\n", - ")\n", - "SyntheticResult.plot(res=link_res,\n", - " high_res_flag=False,\n", - " save=True,\n", - " save_path=\"./outputs\",\n", - " save_name=\"Linkability\"\n", - " )\n", - "\n", - "# Inference risk, base case\n", - "inf_res = inference_risk_evaluation(\n", - " dataset = \"adults\",\n", - " ori = ori,\n", - " syn = syn_anom,\n", - " worst_case_flag = False,\n", - " n_attacks = syn_anom.shape[0]\n", - ")\n", - "SyntheticResult.plot(res=inf_res,\n", - " high_res_flag=False,\n", - " case_flag=\"base\",\n", - " save=True,\n", - " save_path=\"./outputs\",\n", - " save_name=\"Inference_base_case\"\n", - " )\n", - "\n", - "# Inference risk, worst case\n", - "inf_res_worst = inference_risk_evaluation(\n", - " dataset = \"adults\",\n", - " ori = ori,\n", - " syn = syn_anom,\n", - " worst_case_flag = True,\n", - " n_attacks = syn_anom.shape[0]\n", - ")\n", - "SyntheticResult.plot(res=inf_res_worst,\n", - " high_res_flag=False,\n", - " case_flag=\"worst\",\n", - " save=True,\n", - " save_path=\"./outputs\",\n", - " save_name=\"Inference_worst_case\"\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "9f9e01d4", - "metadata": {}, - "source": [ - "## Save and store results" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "0e0c6c09", - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "ReportHandler.__init__() missing 1 required positional argument: 'logger'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[11], line 7\u001b[0m\n\u001b[1;32m 3\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(path)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mleakpro\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mreporting\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mreport_handler\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ReportHandler\n\u001b[0;32m----> 7\u001b[0m reporthandler \u001b[38;5;241m=\u001b[39m \u001b[43mReportHandler\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mreport_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m reporthandler\u001b[38;5;241m.\u001b[39msave_results(\n\u001b[1;32m 12\u001b[0m attack_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msynthetic_inference\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 13\u001b[0m result_data\u001b[38;5;241m=\u001b[39minf_res,\n\u001b[1;32m 14\u001b[0m config\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcase\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbase\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[1;32m 15\u001b[0m )\n", - "\u001b[0;31mTypeError\u001b[0m: ReportHandler.__init__() missing 1 required positional argument: 'logger'" - ] - } - ], - "source": [ - "path = \"../../leakpro_output\"\n", - "if not os.path.exists(path):\n", - " os.makedirs(path)\n", - " \n", - "from leakpro.reporting.report_handler import ReportHandler\n", - "\n", - "reporthandler = ReportHandler(\n", - " report_dir=path,\n", - " )\n", - "\n", - "reporthandler.save_results(\n", - " attack_name=\"synthetic_inference\",\n", - " result_data=inf_res,\n", - " config={\"case\": \"base\"}\n", - " )\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1f9c3942", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/report_handler/gia_utils/cifar.py b/examples/report_handler/gia_utils/cifar.py new file mode 100644 index 00000000..98ee9490 --- /dev/null +++ b/examples/report_handler/gia_utils/cifar.py @@ -0,0 +1,26 @@ +"""Module with functions for preparing the dataset for training the target models.""" +import torchvision +from torch import as_tensor, randperm +from torch.utils.data import DataLoader, Subset, TensorDataset +from torchvision import transforms + +from leakpro.fl_utils.data_utils import get_meanstd + + +def get_cifar10_loader(num_images:int =1, batch_size:int = 1, num_workers:int = 2 ) -> TensorDataset: + """Get the full dataset for CIFAR10.""" + trainset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transforms.ToTensor()) + data_mean, data_std = get_meanstd(trainset) + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize(data_mean, data_std)]) + trainset.transform = transform + + total_examples = len(trainset) + random_indices = randperm(total_examples)[:num_images] + subset_trainset = Subset(trainset, random_indices) + trainloader = DataLoader(subset_trainset, batch_size=batch_size, + shuffle=False, drop_last=True, num_workers=num_workers) + data_mean = as_tensor(data_mean)[:, None, None] + data_std = as_tensor(data_std)[:, None, None] + return trainloader, data_mean, data_std diff --git a/examples/report_handler/gia_utils/model.py b/examples/report_handler/gia_utils/model.py new file mode 100644 index 00000000..403ff164 --- /dev/null +++ b/examples/report_handler/gia_utils/model.py @@ -0,0 +1,81 @@ +"""ResNet model.""" +from typing import Optional + +import torch +import torchvision +from torch import nn +from torchvision.models.resnet import BasicBlock, Bottleneck + +from leakpro.utils.import_helper import Self + + +class ResNet(torchvision.models.ResNet): + """ResNet generalization for CIFAR thingies.""" + + def __init__(self: Self, block: BasicBlock, layers: list, num_classes: int=10, zero_init_residual: bool=False, # noqa: C901 + groups: int=1, base_width: int=64, replace_stride_with_dilation: list=None, + norm_layer: Optional[nn.Module]=None, strides: list=[1, 2, 2, 2], pool: str="avg") -> None: # noqa: B006 + """Initialize as usual. Layers and strides are scriptable.""" + super(torchvision.models.ResNet, self).__init__() # nn.Module + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False, False] + if len(replace_stride_with_dilation) != 4: + raise ValueError("replace_stride_with_dilation should be None " + "or a 4-element tuple, got {}".format(replace_stride_with_dilation)) + self.groups = groups + + self.inplanes = base_width + self.base_width = 64 # Do this to circumvent BasicBlock errors. The value is not actually used. + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + + self.layers = torch.nn.ModuleList() + width = self.inplanes + for idx, layer in enumerate(layers): + self.layers.append(self._make_layer(block, width, layer, stride=strides[idx], dilate=replace_stride_with_dilation[idx])) + width *= 2 + + self.pool = nn.AdaptiveAvgPool2d((1, 1)) if pool == "avg" else nn.AdaptiveMaxPool2d((1, 1)) + self.fc = nn.Linear(width // 2 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, torchvision.models.resnet.BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + + def _forward_impl(self: Self, x: torch.Tensor) -> None: + # See note [TorchScript super()] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + for layer in self.layers: + x = layer(x) + + x = self.pool(x) + x = torch.flatten(x, 1) + x = self.fc(x) + + return x diff --git a/examples/report_handler/mia_utils/audit.yaml b/examples/report_handler/mia_utils/audit.yaml new file mode 100644 index 00000000..073fe7f1 --- /dev/null +++ b/examples/report_handler/mia_utils/audit.yaml @@ -0,0 +1,56 @@ +audit: # Configurations for auditing + random_seed: 1234 # Integer specifying the random seed + attack_list: + # rmia: + # training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) + # attack_data_fraction: 0.5 # Fraction of auxiliary dataset to sample from during attack + # num_shadow_models: 3 # Number of shadow models to train + # online: True # perform online or offline attack + # temperature: 2 + # gamma: 2.0 + # offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b. + # offline_b: 0.66 + # qmia: + # training_data_fraction: 1.0 # Fraction of the auxilary dataset (data without train and test indices) to use for training the quantile regressor + # epochs: 5 # Number of training epochs for quantile regression + population: + attack_data_fraction: 1.0 # Fraction of the auxilary dataset to use for this attack + # lira: + # training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) + # num_shadow_models: 3 # Number of shadow models to train + # online: False # perform online or offline attack + # fixed_variance: True # Use a fixed variance for the whole audit + # boosting: True + # loss_traj: + # training_distill_data_fraction : 0.7 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2 + # number_of_traj: 10 # Number of epochs (number of points in the loss trajectory) + # label_only: False # True or False + # mia_classifier_epochs: 100 + # HSJ: + # attack_data_fraction: 0.01 # Fraction of the auxilary dataset to use for this attack + # target_metadata_path: "./target/model_metadata.pkl" + # num_iterations: 2 # Number of iterations for the optimization + # initial_num_evals: 100 # Number of evaluations for number of random vecotr to estimate the gradient + # max_num_evals: 10000 # Maximum number of evaluations + # stepsize_search: "geometric_progression" # Step size search method + # gamma: 1.0 # Gamma for the optimization + # constraint: 2 + # batch_size: 50 + # verbose: True + # epsilon_threshold: 1e-6 + + output_dir: "./leakpro_output" + attack_type: "mia" #mia, gia + modality: "image" #image, tabular + +target: + # Target model path + module_path: "./mia_utils/utils/cifar_model_preparation.py" + model_class: "ResNet18" + # Data paths + target_folder: "./target" + data_path: "./data/cifar10.pkl" + +shadow_model: + +distillation_model: diff --git a/examples/report_handler/mia_utils/cifar_handler.py b/examples/report_handler/mia_utils/cifar_handler.py new file mode 100644 index 00000000..97755604 --- /dev/null +++ b/examples/report_handler/mia_utils/cifar_handler.py @@ -0,0 +1,67 @@ +"""Module containing the class to handle the user input for the CIFAR100 dataset.""" + +import torch +from torch import cuda, device, optim, sigmoid +from torch.nn import CrossEntropyLoss +from torch.utils.data import DataLoader +from tqdm import tqdm + +from leakpro import AbstractInputHandler + +class CifarInputHandler(AbstractInputHandler): + """Class to handle the user input for the CIFAR100 dataset.""" + + def __init__(self, configs: dict) -> None: + super().__init__(configs = configs) + print(configs) + + + def get_criterion(self)->None: + """Set the CrossEntropyLoss for the model.""" + return CrossEntropyLoss() + + def get_optimizer(self, model:torch.nn.Module) -> None: + """Set the optimizer for the model.""" + learning_rate = 0.1 + momentum = 0.8 + return optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) + + def train( + self, + dataloader: DataLoader, + model: torch.nn.Module = None, + criterion: torch.nn.Module = None, + optimizer: optim.Optimizer = None, + epochs: int = None, + ) -> dict: + """Model training procedure.""" + + # read hyperparams for training (the parameters for the dataloader are defined in get_dataloader): + if epochs is None: + raise ValueError("epochs not found in configs") + + # prepare training + gpu_or_cpu = device("cuda" if cuda.is_available() else "cpu") + model.to(gpu_or_cpu) + + # training loop + for epoch in range(epochs): + train_loss, train_acc = 0, 0 + model.train() + for inputs, labels in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"): + labels = labels.long() + inputs, labels = inputs.to(gpu_or_cpu, non_blocking=True), labels.to(gpu_or_cpu, non_blocking=True) + optimizer.zero_grad() + outputs = model(inputs) + loss = criterion(outputs, labels) + pred = outputs.data.max(1, keepdim=True)[1] + loss.backward() + optimizer.step() + + # Accumulate performance of shadow model + train_acc += pred.eq(labels.data.view_as(pred)).sum() + train_loss += loss.item() + + model.to("cpu") + + return {"model": model, "metrics": {"accuracy": train_acc, "loss": train_loss}} diff --git a/examples/report_handler/mia_utils/train_config.yaml b/examples/report_handler/mia_utils/train_config.yaml new file mode 100644 index 00000000..4ea31131 --- /dev/null +++ b/examples/report_handler/mia_utils/train_config.yaml @@ -0,0 +1,17 @@ +run: # Configurations for a specific run + random_seed: 1234 # Integer number of specifying random seed + log_dir: target # String for indicating where to save all the information, including models and computed signals. We can reuse the models saved in the same log_dir. + +train: # Configuration for training + epochs: 3 # Integer number for indicating the epochs for training target model. For speedyresnet, it uses its own number of epochs. + batch_size: 128 # Integer number for indicating batch size for training the target model. For speedyresnet, it uses its own batch size. + optimizer: SGD # String which indicates the optimizer. We support Adam and SGD. For speedyresnet, it uses its own optimizer. + learning_rate: 0.01 # Float number for indicating learning rate for training the target model. For speedyresnet, it uses its own learning_rate. + momentum: 0.9 + weight_decay: 0.0 # Float number for indicating weight decay for training the target model. For speedyresnet, it uses its own weight_decay. + +data: # Configuration for data + dataset: cifar10 # String indicates the name of the dataset + f_train: 0.3 # Float number from 0 to 1 indicating the fraction of the train dataset + f_test: 0.3 # Float number from 0 to 1 indicating the size of the test set + data_dir: ./data # String about where to save the data. \ No newline at end of file diff --git a/examples/report_handler/mia_utils/utils/cifar_data_preparation.py b/examples/report_handler/mia_utils/utils/cifar_data_preparation.py new file mode 100644 index 00000000..90d48987 --- /dev/null +++ b/examples/report_handler/mia_utils/utils/cifar_data_preparation.py @@ -0,0 +1,111 @@ +import os +import numpy as np +import pickle +from sklearn.model_selection import train_test_split +from torchvision import transforms +from torchvision.datasets import CIFAR10, CIFAR100 +from torch.utils.data import Dataset, Subset, DataLoader +from torch import tensor, float32, cat + + + +class CifarDataset(Dataset): + def __init__(self, x, y, transform=None, indices=None): + """ + Custom dataset for CIFAR data. + + Args: + x (torch.Tensor): Tensor of input images. + y (torch.Tensor): Tensor of labels. + transform (callable, optional): Optional transform to be applied on the image tensors. + """ + self.x = x + self.y = y + self.transform = transform + self.indices = indices + + def __len__(self): + """Return the total number of samples.""" + return len(self.y) + + def __getitem__(self, idx): + """Retrieve the image and its corresponding label at index 'idx'.""" + image = self.x[idx] + label = self.y[idx] + + # Apply transformations to the image if any + if self.transform: + image = self.transform(image) + + return image, label + + @classmethod + def from_cifar(cls, config, download=True, transform=None): + + root = config["data"]["data_dir"] + # Load the CIFAR train and test datasets + if config["data"]["dataset"] == "cifar10": + trainset = CIFAR10(root=root, train=True, download=download, transform=transforms.ToTensor()) + testset = CIFAR10(root=root, train=False, download=download, transform=transforms.ToTensor()) + elif config["data"]["dataset"] == "cifar100": + trainset = CIFAR100(root=root, train=True, download=download, transform=transforms.ToTensor()) + testset = CIFAR100(root=root, train=False, download=download, transform=transforms.ToTensor()) + else: + raise ValueError("Unknown dataset type") + + # Concatenate both datasets' data and labels + data = cat([tensor(trainset.data, dtype=float32), + tensor(testset.data, dtype=float32)], + dim=0) + # Rescale data from [0, 255] to [0, 1] + data /= 255.0 + normalize = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + data = data.permute(0, 3, 1, 2) + data = normalize(data) + + targets = cat([tensor(trainset.targets), tensor(testset.targets)], dim=0) + + return cls(data, targets) + + + def subset(self, indices): + """Return a subset of the dataset based on the given indices.""" + return CifarDataset(self.x[indices], self.y[indices], transform=self.transform) + + +def get_cifar_dataloader(data_path, train_config): + # Create the combined CIFAR-10 dataset + train_fraction = train_config["data"]["f_train"] + test_fraction = train_config["data"]["f_test"] + cifar_type = train_config["data"]["dataset"] + batch_size = train_config["train"]["batch_size"] + + transform = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + + population_dataset = CifarDataset.from_cifar(config=train_config, download=True, transform=transform) + + file_path = "data/"+ cifar_type + ".pkl" + if not os.path.exists(file_path): + with open(file_path, "wb") as file: + pickle.dump(population_dataset, file) + print(f"Save data to {file_path}.pkl") + + dataset_size = len(population_dataset) + train_size = int(train_fraction * dataset_size) + test_size = int(test_fraction * dataset_size) + + # Use sklearn's train_test_split to split into train and test indices + selected_index = np.random.choice(np.arange(dataset_size), train_size + test_size, replace=False) + train_indices, test_indices = train_test_split(selected_index, test_size=test_size) + + train_subset = Subset(population_dataset, train_indices) + test_subset = Subset(population_dataset, test_indices) + + train_loader = DataLoader(train_subset, batch_size =batch_size, shuffle=True) + test_loader = DataLoader(test_subset, batch_size= batch_size, shuffle=False) + + return train_loader, test_loader + + + diff --git a/examples/report_handler/mia_utils/utils/cifar_model_preparation.py b/examples/report_handler/mia_utils/utils/cifar_model_preparation.py new file mode 100644 index 00000000..4021da04 --- /dev/null +++ b/examples/report_handler/mia_utils/utils/cifar_model_preparation.py @@ -0,0 +1,117 @@ +import torch.nn as nn +from torch import device, optim, cuda, no_grad, save, sigmoid +import torchvision.models as models +import pickle +from tqdm import tqdm + +class ResNet18(nn.Module): + def __init__(self, num_classes): + super(ResNet18, self).__init__() + self.model = models.resnet18(pretrained=False) + self.model.fc = nn.Linear(self.model.fc.in_features, num_classes) + self.init_params = {"num_classes": num_classes} + + def forward(self, x): + return self.model(x) + +def evaluate(model, loader, criterion, device): + model.eval() + loss, acc = 0, 0 + with no_grad(): + for data, target in loader: + data, target = data.to(device), target.to(device) + target = target.view(-1) + output = model(data) + loss += criterion(output, target).item() + pred = output.argmax(dim=1) + acc += pred.eq(target).sum().item() + loss /= len(loader) + acc = float(acc) / len(loader.dataset) + return loss, acc + +def create_trained_model_and_metadata(model, + train_loader, + test_loader, + train_config): + lr = train_config["train"]["learning_rate"] + momentum = train_config["train"]["momentum"] + epochs = train_config["train"]["epochs"] + + device_name = device("cuda" if cuda.is_available() else "cpu") + model.to(device_name) + model.train() + + criterion = nn.CrossEntropyLoss() + optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) + train_losses, train_accuracies = [], [] + test_losses, test_accuracies = [], [] + + for e in tqdm(range(epochs), desc="Training Progress"): + model.train() + train_acc, train_loss = 0.0, 0.0 + + for data, target in train_loader: + data, target = data.to(device_name, non_blocking=True), target.to(device_name, non_blocking=True) + target = target.view(-1) + optimizer.zero_grad() + output = model(data) + + loss = criterion(output, target) + pred = output.argmax(dim=1) # for multi-class classification + train_acc += pred.eq(target).sum().item() + + loss.backward() + optimizer.step() + train_loss += loss.item() + + train_loss /= len(train_loader) + train_acc /= len(train_loader.dataset) + + train_losses.append(train_loss) + train_accuracies.append(train_acc) + + test_loss, test_acc = evaluate(model, test_loader, criterion, device_name) + test_losses.append(test_loss) + test_accuracies.append(test_acc) + + # Move the model back to the CPU + model.to("cpu") + with open( train_config["run"]["log_dir"]+"/target_model.pkl", "wb") as f: + save(model.state_dict(), f) + + # Create metadata and store it + meta_data = {} + meta_data["train_indices"] = train_loader.dataset.indices + meta_data["test_indices"] = test_loader.dataset.indices + meta_data["num_train"] = len(meta_data["train_indices"]) + + # Write init params + meta_data["init_params"] = {} + for key, value in model.init_params.items(): + meta_data["init_params"][key] = value + + # read out optimizer parameters + meta_data["optimizer"] = {} + meta_data["optimizer"]["name"] = optimizer.__class__.__name__.lower() + meta_data["optimizer"]["lr"] = optimizer.param_groups[0].get("lr", 0) + meta_data["optimizer"]["weight_decay"] = optimizer.param_groups[0].get("weight_decay", 0) + meta_data["optimizer"]["momentum"] = optimizer.param_groups[0].get("momentum", 0) + meta_data["optimizer"]["dampening"] = optimizer.param_groups[0].get("dampening", 0) + meta_data["optimizer"]["nesterov"] = optimizer.param_groups[0].get("nesterov", False) + + # read out criterion parameters + meta_data["loss"] = {} + meta_data["loss"]["name"] = criterion.__class__.__name__.lower() + + meta_data["batch_size"] = train_loader.batch_size + meta_data["epochs"] = epochs + meta_data["train_acc"] = train_acc + meta_data["test_acc"] = test_acc + meta_data["train_loss"] = train_loss + meta_data["test_loss"] = test_loss + meta_data["dataset"] = train_config["data"]["dataset"] + + with open("target/model_metadata.pkl", "wb") as f: + pickle.dump(meta_data, f) + + return train_accuracies, train_losses, test_accuracies, test_losses diff --git a/examples/report_handler/report_handler.ipynb b/examples/report_handler/report_handler.ipynb new file mode 100644 index 00000000..5e713e1d --- /dev/null +++ b/examples/report_handler/report_handler.ipynb @@ -0,0 +1,562 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "95d5acad-514e-4950-94a0-c80d789d9364", + "metadata": {}, + "source": [ + "# Report handler examples" + ] + }, + { + "cell_type": "markdown", + "id": "71f5dbe9", + "metadata": {}, + "source": [ + "Install leakpro as ``` pip install -e /path/to/leakpro ```" + ] + }, + { + "cell_type": "markdown", + "id": "68b48ce8", + "metadata": {}, + "source": [ + "### Synthetic examples" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "bcf529c7-8bfe-49da-9889-59111ec2cd73", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "\n", + "import pandas as pd\n", + "\n", + "sys.path.append(\"../..\")\n", + "\n", + "from leakpro.synthetic_data_attacks.anomalies import return_anomalies\n", + "from leakpro.synthetic_data_attacks.inference_utils import inference_risk_evaluation\n", + "from leakpro.synthetic_data_attacks.linkability_utils import linkability_risk_evaluation\n", + "from leakpro.synthetic_data_attacks.singling_out_utils import singling_out_risk_evaluation\n", + "\n", + "#Get ori and syn\n", + "n_samples = 100\n", + "DATA_PATH = \"../synthetic_data/datasets/\"\n", + "ori = pd.read_csv(os.path.join(DATA_PATH, \"adults_ori.csv\"), nrows=n_samples)\n", + "syn = pd.read_csv(os.path.join(DATA_PATH, \"adults_syn.csv\"), nrows=n_samples)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c89f3738", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=64)]: Using backend ThreadingBackend with 64 concurrent workers.\n", + "[Parallel(n_jobs=64)]: Done 2 out of 64 | elapsed: 1.2s remaining: 36.4s\n", + "[Parallel(n_jobs=64)]: Done 64 out of 64 | elapsed: 4.4s finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unique predictions (array([-1, 1]), array([ 3, 97]))\n", + "Syn anom shape (3, 14)\n" + ] + } + ], + "source": [ + "syn_anom = return_anomalies(df=syn, n_estimators=1000, n_jobs=-1, verbose=True)\n", + "print(\"Syn anom shape\",syn_anom.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ad69ece9", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a singling-out result\n", + "sin_out_res = singling_out_risk_evaluation(\n", + " dataset = \"adults\",\n", + " ori = ori,\n", + " syn = syn_anom,\n", + " n_attacks = syn_anom.shape[0]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7d7ffb5a", + "metadata": {}, + "outputs": [], + "source": [ + "# Create linkability result\n", + "link_res = linkability_risk_evaluation(\n", + " dataset = \"adults\",\n", + " ori = ori,\n", + " syn = syn_anom,\n", + " n_samples = syn_anom.shape[0],\n", + " n_attacks = 100\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0a5c20e2", + "metadata": {}, + "outputs": [], + "source": [ + "# # Create base-case inference result\n", + "inf_res = inference_risk_evaluation(\n", + " dataset = \"adults\",\n", + " ori = ori,\n", + " syn = syn_anom,\n", + " worst_case_flag = False,\n", + " n_attacks = syn_anom.shape[0]\n", + ")\n", + "\n", + "# # Create worst-case inference result\n", + "inf_res_worst = inference_risk_evaluation(\n", + " dataset = \"adults\",\n", + " ori = ori,\n", + " syn = syn_anom,\n", + " worst_case_flag = True,\n", + " n_attacks = syn_anom.shape[0]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "0b3be474", + "metadata": {}, + "source": [ + "### Gradient inversion example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35aee5a3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Files already downloaded and verified\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-27 08:38:00,656 INFO Inverting gradient initialized.\n", + "2024-11-27 08:38:02,796 INFO Iteration 0, loss 0.0003550234832800925\n", + "2024-11-27 08:38:02,803 INFO New best loss: 0.0003550234832800925 on round: 0\n", + "2024-11-27 08:38:02,946 INFO New best loss: 0.00033073360100388527 on round: 1\n", + "2024-11-27 08:38:03,193 INFO New best loss: 0.0003084556374233216 on round: 3\n", + "2024-11-27 08:38:03,552 INFO New best loss: 0.0002839812950696796 on round: 6\n", + "2024-11-27 08:38:03,674 INFO New best loss: 0.00028284190921112895 on round: 7\n", + "2024-11-27 08:38:03,802 INFO New best loss: 0.00027706200489774346 on round: 8\n", + "2024-11-27 08:38:03,924 INFO New best loss: 0.00026298483135178685 on round: 9\n", + "2024-11-27 08:38:04,303 INFO New best loss: 0.00025501404888927937 on round: 12\n", + "2024-11-27 08:38:04,427 INFO New best loss: 0.00025296382955275476 on round: 13\n", + "2024-11-27 08:38:04,677 INFO New best loss: 0.00024899665731936693 on round: 15\n", + "2024-11-27 08:38:04,807 INFO New best loss: 0.0002483891148585826 on round: 16\n", + "2024-11-27 08:38:04,936 INFO New best loss: 0.0002449913590680808 on round: 17\n", + "2024-11-27 08:38:05,063 INFO New best loss: 0.0002429946616757661 on round: 18\n", + "2024-11-27 08:38:05,185 INFO New best loss: 0.00023749274259898812 on round: 19\n", + "2024-11-27 08:38:05,890 INFO New best loss: 0.0002372416784055531 on round: 25\n", + "2024-11-27 08:38:06,015 INFO New best loss: 0.00023295756545849144 on round: 26\n", + "2024-11-27 08:38:06,139 INFO New best loss: 0.00023274944396689534 on round: 27\n", + "2024-11-27 08:38:06,266 INFO New best loss: 0.00023147281899582595 on round: 28\n", + "2024-11-27 08:38:06,394 INFO New best loss: 0.00022923419601283967 on round: 29\n", + "2024-11-27 08:38:06,646 INFO New best loss: 0.00022827980865258723 on round: 31\n", + "2024-11-27 08:38:06,775 INFO New best loss: 0.00022587741841562092 on round: 32\n", + "2024-11-27 08:38:06,907 INFO New best loss: 0.00022527067631017417 on round: 33\n", + "2024-11-27 08:38:08,442 INFO New best loss: 0.00022461664048023522 on round: 45\n", + "2024-11-27 08:38:08,569 INFO New best loss: 0.00022421970788855106 on round: 46\n", + "2024-11-27 08:38:08,696 INFO New best loss: 0.0002231039834441617 on round: 47\n", + "2024-11-27 08:38:08,819 INFO New best loss: 0.00022097492183092982 on round: 48\n", + "2024-11-27 08:38:10,106 INFO New best loss: 0.00022094276209827513 on round: 59\n", + "2024-11-27 08:38:10,216 INFO New best loss: 0.00022092672588769346 on round: 60\n", + "2024-11-27 08:38:10,328 INFO New best loss: 0.00022070117120165378 on round: 61\n", + "2024-11-27 08:38:10,434 INFO New best loss: 0.00022059425828047097 on round: 62\n", + "2024-11-27 08:38:10,539 INFO New best loss: 0.00022042241471353918 on round: 63\n", + "2024-11-27 08:38:11,368 INFO New best loss: 0.00022039355826564133 on round: 71\n", + "2024-11-27 08:38:11,472 INFO New best loss: 0.00022023300698492676 on round: 72\n", + "2024-11-27 08:38:11,680 INFO New best loss: 0.0002201125753344968 on round: 74\n", + "2024-11-27 08:38:12,092 INFO New best loss: 0.00021973479306325316 on round: 78\n", + "2024-11-27 08:38:12,195 INFO New best loss: 0.00021972827380523086 on round: 79\n", + "/opt/conda/lib/python3.10/site-packages/torchmetrics/utilities/prints.py:70: FutureWarning: Importing `peak_signal_noise_ratio` from `torchmetrics.functional` was deprecated and will be removed in 2.0. Import `peak_signal_noise_ratio` from `torchmetrics.image` instead.\n", + " _future_warning(\n" + ] + } + ], + "source": [ + "from gia_utils.cifar import get_cifar10_loader\n", + "from gia_utils.model import ResNet\n", + "from torchvision.models.resnet import BasicBlock\n", + "\n", + "from leakpro.attacks.gia_attacks.invertinggradients import InvertingConfig\n", + "from leakpro.fl_utils.gia_train import train\n", + "from leakpro.run import run_inverting\n", + "\n", + "model = ResNet(BasicBlock, [5, 5, 5], num_classes=10, base_width=16 * 10)\n", + "client_dataloader, data_mean, data_std = get_cifar10_loader(num_images=1, batch_size=1, num_workers=2)\n", + "\n", + "# Meta train function designed to work with GIA\n", + "train_fn = train\n", + "\n", + "# Baseline config\n", + "configs = InvertingConfig()\n", + "configs.at_iterations = 80 # Decreased from 8000 to avoid GPU memory crash\n", + "\n", + "GIA_result = run_inverting(model, client_dataloader, train_fn, data_mean, data_std, configs)" + ] + }, + { + "cell_type": "markdown", + "id": "645e8caa", + "metadata": {}, + "source": [ + "### Membership Inference Attack, CIFAR example" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d38d6aa1", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import yaml\n", + "\n", + "project_root = os.path.abspath(os.path.join(os.getcwd(), \"../../..\"))\n", + "sys.path.append(project_root)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a45a0d6b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Files already downloaded and verified\n", + "Files already downloaded and verified\n" + ] + } + ], + "source": [ + "from mia_utils.utils.cifar_data_preparation import get_cifar_dataloader\n", + "from mia_utils.utils.cifar_model_preparation import ResNet18, create_trained_model_and_metadata\n", + "\n", + "\n", + "# Load the config.yaml file\n", + "with open('mia_utils/train_config.yaml', 'r') as file:\n", + " train_config = yaml.safe_load(file)\n", + "\n", + "# Generate the dataset and dataloaders\n", + "path = os.path.join(os.getcwd(), train_config[\"data\"][\"data_dir\"])\n", + "\n", + "train_loader, test_loader = get_cifar_dataloader(path, train_config)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4cda80cf", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", + " warnings.warn(\n", + "/opt/conda/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.\n", + " warnings.warn(msg)\n", + "Training Progress: 100%|██████████| 3/3 [00:13<00:00, 4.39s/it]\n" + ] + } + ], + "source": [ + "# Train the model\n", + "if not os.path.exists(\"target\"):\n", + " os.makedirs(\"target\")\n", + "if train_config[\"data\"][\"dataset\"] == \"cifar10\":\n", + " num_classes = 10\n", + "elif train_config[\"data\"][\"dataset\"] == \"cifar100\":\n", + " num_classes = 100\n", + "else:\n", + " raise ValueError(\"Invalid dataset name\")\n", + "\n", + "model = ResNet18(num_classes = num_classes)\n", + "train_acc, train_loss, test_acc, test_loss = create_trained_model_and_metadata(model, \n", + " train_loader, \n", + " test_loader, \n", + " train_config)" + ] + }, + { + "cell_type": "markdown", + "id": "0872bf51", + "metadata": {}, + "source": [ + "##### Run the MIA attack" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f28eb14f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-27 08:38:27,995 INFO Target model blueprint created from ResNet18 in ./mia_utils/utils/cifar_model_preparation.py.\n", + "2024-11-27 08:38:27,997 INFO Loaded target model metadata from ./target/model_metadata.pkl\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'audit': {'random_seed': 1234, 'attack_list': {'population': {'attack_data_fraction': 1.0}}, 'output_dir': './leakpro_output', 'attack_type': 'mia', 'modality': 'image'}, 'target': {'module_path': './mia_utils/utils/cifar_model_preparation.py', 'model_class': 'ResNet18', 'target_folder': './target', 'data_path': './data/cifar10.pkl'}, 'shadow_model': None, 'distillation_model': None}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-27 08:38:28,210 INFO Loaded target model from ./target\n", + "2024-11-27 08:38:29,306 INFO Loaded population dataset from ./data/cifar10.pkl\n", + "2024-11-27 08:38:29,306 INFO Loaded population dataset from ./data/cifar10.pkl\n", + "2024-11-27 08:38:29,307 INFO Creating shadow model handler singleton\n", + "2024-11-27 08:38:29,308 INFO Creating distillation model handler singleton\n", + "2024-11-27 08:38:29,310 INFO Configuring the Population attack\n", + "2024-11-27 08:38:29,310 INFO Added attack: population\n", + "2024-11-27 08:38:29,311 INFO Preparing attack: population\n", + "2024-11-27 08:38:29,312 INFO Preparing attack data for training the Population attack\n", + "2024-11-27 08:38:29,316 INFO Subsampling attack data from 24000 points\n", + "2024-11-27 08:38:29,317 INFO Number of attack data points after subsampling: 24000\n", + "2024-11-27 08:38:29,318 INFO Computing signals for the Population attack\n", + "Getting loss for model 1/ 1: 100%|██████████| 750/750 [00:12<00:00, 62.27it/s]\n", + "2024-11-27 08:38:41,410 INFO Running attack: population\n", + "2024-11-27 08:38:41,415 INFO Running the Population attack on the target model\n", + "Getting loss for model 1/ 1: 100%|██████████| 1125/1125 [00:18<00:00, 62.04it/s]\n", + "2024-11-27 08:38:59,662 INFO Attack completed\n", + "2024-11-27 08:38:59,674 INFO Finished attack: population\n", + "2024-11-27 08:38:59,675 INFO Preparing results for attack: population\n", + "2024-11-27 08:38:59,675 INFO Auditing completed\n" + ] + } + ], + "source": [ + "from mia_utils.cifar_handler import CifarInputHandler\n", + "\n", + "from leakpro import LeakPro\n", + "\n", + "# Read the config file\n", + "config_path = \"mia_utils/audit.yaml\"\n", + "\n", + "# Prepare leakpro object\n", + "leakpro = LeakPro(CifarInputHandler, config_path)\n", + "\n", + "# Run the audit \n", + "mia_results = leakpro.run_audit(return_results=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "373dcc8a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-27 08:38:59,693 INFO Initializing report handler...\n", + "2024-11-27 08:38:59,693 INFO report_dir set to: ../../leakpro_output/results\n", + "2024-11-27 08:38:59,694 INFO Saving results for singling_out\n", + "2024-11-27 08:39:01,779 INFO Saving results for linkability_risk\n", + "2024-11-27 08:39:03,748 INFO Saving results for inference_risk_base\n", + "2024-11-27 08:39:08,623 INFO Saving results for inference_risk_worst\n", + "2024-11-27 08:39:10,925 INFO Saving results for gia\n", + "2024-11-27 08:39:10,939 INFO Saving results for population\n" + ] + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import sys\n", + "sys.path.append(\"../..\")\n", + "\n", + "# Import and initialize ReportHandler\n", + "from leakpro.reporting.report_handler import ReportHandler\n", + "report_handler = ReportHandler()\n", + "\n", + "# # Save Synthetic results using the ReportHandler\n", + "report_handler.save_results(attack_name=\"singling_out\", result_data=sin_out_res)\n", + "report_handler.save_results(attack_name=\"linkability_risk\", result_data=link_res)\n", + "report_handler.save_results(attack_name=\"inference_risk_base\", result_data=inf_res)\n", + "report_handler.save_results(attack_name=\"inference_risk_worst\", result_data=inf_res_worst)\n", + "\n", + "# # Save GIA results using report handler\n", + "report_handler.save_results(attack_name=\"gia\", result_data=GIA_result)\n", + "\n", + "# Save MIA resuls using report handler\n", + "for res in mia_results:\n", + " report_handler.save_results(attack_name=res.attack_name, result_data=res, config=res.configs)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1d91c7e0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-11-27 08:40:04,818 INFO PDF compiled\n" + ] + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Use the ReportHandler and load all the saved results\n", + "report_handler.load_results()\n", + "\n", + "# Create results and collect corresponding latex texts\n", + "report_handler.create_results_all()\n", + "\n", + "# Create the report by compiling the latex text\n", + "report_handler.create_report()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/report_handler/report_handler_anomalies.ipynb b/examples/report_handler/report_handler_anomalies.ipynb deleted file mode 100644 index a32fbdda..00000000 --- a/examples/report_handler/report_handler_anomalies.ipynb +++ /dev/null @@ -1,203 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "95d5acad-514e-4950-94a0-c80d789d9364", - "metadata": {}, - "source": [ - "# Report handler examples" - ] - }, - { - "cell_type": "markdown", - "id": "71f5dbe9", - "metadata": {}, - "source": [ - "Install leakpro as ``` pip install -e /path/to/leakpro ```" - ] - }, - { - "cell_type": "markdown", - "id": "68b48ce8", - "metadata": {}, - "source": [ - "### Synthetic examples" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "bcf529c7-8bfe-49da-9889-59111ec2cd73", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import sys\n", - "\n", - "import pandas as pd\n", - "\n", - "sys.path.append(\"../..\")\n", - "\n", - "from leakpro.synthetic_data_attacks import plots\n", - "from leakpro.synthetic_data_attacks.anomalies import return_anomalies\n", - "from leakpro.synthetic_data_attacks.inference_utils import inference_risk_evaluation\n", - "from leakpro.synthetic_data_attacks.linkability_utils import linkability_risk_evaluation\n", - "from leakpro.synthetic_data_attacks.singling_out_utils import singling_out_risk_evaluation\n", - "# from leakpro.metrics.attack_result import SyntheticResult\n", - "\n", - "#Get ori and syn\n", - "n_samples = 100\n", - "DATA_PATH = \"../synthetic_data/datasets/\"\n", - "ori = pd.read_csv(os.path.join(DATA_PATH, \"adults_ori.csv\"), nrows=n_samples)\n", - "syn = pd.read_csv(os.path.join(DATA_PATH, \"adults_syn.csv\"), nrows=n_samples)" - ] - }, - { - "cell_type": "markdown", - "id": "62c44504-a5fa-4846-8132-53877f369825", - "metadata": {}, - "source": [ - "### Get anomalies of synthetic data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ad69ece9", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a result\n", - "\n", - "syn_anom = return_anomalies(df=syn, n_estimators=1000, n_jobs=-1, verbose=True)\n", - "print(\"Syn anom shape\",syn_anom.shape)\n", - "\n", - "sin_out_res = singling_out_risk_evaluation(\n", - " dataset = \"adults\",\n", - " ori = ori,\n", - " syn = syn_anom,\n", - " n_attacks = syn_anom.shape[0]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "373dcc8a", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-11-20 00:09:42,566 INFO Initializing report handler...\n", - "2024-11-20 00:09:42,567 INFO report_dir set to: ../../leakpro_output/results\n", - "2024-11-20 00:09:42,569 INFO Saving results for singling_out\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "../../leakpro_output/results\n" - ] - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Import and initialize ReportHandler\n", - "from leakpro.reporting.report_handler import ReportHandler\n", - "report_handler = ReportHandler()\n", - "\n", - "# Save the result using the ReportHandler\n", - "report_handler.save_results(attack_name=\"singling_out\", result_data=sin_out_res)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d91c7e0", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-11-19 23:38:04,520 INFO No results of type GIAResults found.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "../../leakpro_output/results\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-11-19 23:38:14,966 INFO PDF compiled\n" - ] - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Use the ReportHandler and load all the saved results\n", - "report_handler.load_results()\n", - "\n", - "# Create results and collect corresponding latex texts\n", - "report_handler.create_results_all()\n", - "\n", - "# Create the report by compiling the latex text\n", - "report_handler.create_report()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/leakpro/attacks/gia_attacks/invertinggradients.py b/leakpro/attacks/gia_attacks/invertinggradients.py index 4e2fc369..4b583e7d 100755 --- a/leakpro/attacks/gia_attacks/invertinggradients.py +++ b/leakpro/attacks/gia_attacks/invertinggradients.py @@ -44,6 +44,7 @@ def __init__(self: Self, model: Module, client_loader: DataLoader, train_fn: Cal self.train_fn = train_fn self.data_mean = data_mean self.data_std = data_std + self.configs = configs self.t_v_scale = configs.total_variation self.attack_lr = configs.attack_lr self.iterations = configs.at_iterations @@ -123,7 +124,8 @@ def run_attack(self:Self) -> GIAResults: pass return GIAResults(self.client_loader, self.best_reconstruction, - dataloaders_psnr(self.client_loader, self.reconstruction_loader), self.data_mean, self.data_std) + dataloaders_psnr(self.client_loader, self.reconstruction_loader), self.data_mean, self.data_std, + self.configs) def gradient_closure(self: Self, optimizer: torch.optim.Optimizer) -> Callable: diff --git a/leakpro/attacks/mia_attacks/attack_p.py b/leakpro/attacks/mia_attacks/attack_p.py index 7ff4d3c9..44509546 100755 --- a/leakpro/attacks/mia_attacks/attack_p.py +++ b/leakpro/attacks/mia_attacks/attack_p.py @@ -86,7 +86,7 @@ def prepare_attack(self:Self) -> None: logger.info("Computing signals for the Population attack") self.attack_signal = np.array(self.signal([self.target_model], self.handler, attack_data_indices)).squeeze() - def run_attack(self:Self) -> CombinedMetricResult: + def run_attack(self:Self) -> MIAResult: """Run the attack on the target model and dataset. Args: diff --git a/leakpro/attacks/mia_attacks/lira.py b/leakpro/attacks/mia_attacks/lira.py index c366302e..d52a347b 100755 --- a/leakpro/attacks/mia_attacks/lira.py +++ b/leakpro/attacks/mia_attacks/lira.py @@ -8,7 +8,7 @@ from leakpro.attacks.utils.boosting import Memorization from leakpro.attacks.utils.shadow_model_handler import ShadowModelHandler from leakpro.input_handler.abstract_input_handler import AbstractInputHandler -from leakpro.metrics.attack_result import CombinedMetricResult, MIAResult +from leakpro.metrics.attack_result import MIAResult from leakpro.signals.signal import ModelRescaledLogits from leakpro.utils.import_helper import Self from leakpro.utils.logger import logger @@ -159,12 +159,12 @@ def prepare_attack(self:Self)->None: logger.info(f"Some shadow model(s) contains {count_in_samples} IN samples in total for the model(s)") logger.info("This is not an offline attack!") - self.logger.info(f"Calculating the logits for all {self.num_shadow_models} shadow models") + logger.info(f"Calculating the logits for all {self.num_shadow_models} shadow models") self.shadow_models_logits = np.swapaxes(self.signal(self.shadow_models, self.handler, self.audit_dataset["data"], self.eval_batch_size), 0, 1) # Calculate logits for the target model - self.logger.info("Calculating the logits for the target model") + logger.info("Calculating the logits for the target model") self.target_logits = np.swapaxes(self.signal([self.target_model], self.handler, self.audit_dataset["data"], self.eval_batch_size), 0, 1).squeeze() @@ -246,7 +246,7 @@ def _individual_carlini(self:Self, logits: list, mask: list, is_in: bool) -> np. return self.fixed_in_std return self.fixed_out_std - def run_attack(self:Self) -> CombinedMetricResult: + def run_attack(self:Self) -> MIAResult: """Runs the attack on the target model and dataset and assess privacy risks or data leakage. This method evaluates how the target model's output (logits) for a specific dataset diff --git a/leakpro/attacks/mia_attacks/loss_trajectory.py b/leakpro/attacks/mia_attacks/loss_trajectory.py index 072b5aa9..a51c2390 100755 --- a/leakpro/attacks/mia_attacks/loss_trajectory.py +++ b/leakpro/attacks/mia_attacks/loss_trajectory.py @@ -423,4 +423,4 @@ def run_attack(self:Self) -> MIAResult: true_labels=true_labels, predictions_proba=None, signal_values=signals, - ) \ No newline at end of file + ) diff --git a/leakpro/leakpro.py b/leakpro/leakpro.py index 9b24d506..7b060809 100755 --- a/leakpro/leakpro.py +++ b/leakpro/leakpro.py @@ -32,7 +32,6 @@ setup, ) from leakpro.input_handler.modality_extensions.tabular_extension import TabularExtension -from leakpro.reporting.utils import prepare_privacy_risk_report from leakpro.utils.import_helper import Self from leakpro.utils.logger import add_file_handler, logger @@ -126,17 +125,25 @@ def setup_handler(self:Self, handler_class:AbstractInputHandler, configs:dict) - return handler - def run_audit(self:Self) -> None: + def run_audit(self:Self, return_results: bool = False) -> None: """Run the audit.""" audit_results = self.attack_scheduler.run_attacks() + results = [] if return_results else None for attack_name in audit_results: logger.info(f"Preparing results for attack: {attack_name}") - prepare_privacy_risk_report( - audit_results[attack_name]["result_object"], - self.handler.configs["audit"], - save_path=f"{self.report_dir}/{attack_name}", - ) + if return_results: + + result = audit_results[attack_name]["result_object"] + result.attack_name = attack_name + result.configs = self.handler.configs["audit"] + + # Append + results.append(result) + else: + result = audit_results[attack_name]["result_object"] + result.save(name=attack_name, path=self.report_dir, config=self.handler.configs["audit"]) logger.info("Auditing completed") + return results diff --git a/leakpro/metrics/attack_result.py b/leakpro/metrics/attack_result.py index 5bb72131..cf5a7cd6 100755 --- a/leakpro/metrics/attack_result.py +++ b/leakpro/metrics/attack_result.py @@ -105,7 +105,6 @@ def __init__( # noqa: PLR0913 threshold: Threshold computed by the metric. """ - # TODO REDIFINE THE CLASS SO IT DOSE NOT STORE MATRICIES BUT VECTORS self.predicted_labels = predicted_labels self.true_labels = true_labels @@ -194,7 +193,6 @@ def __init__( # noqa: PLR0913 metadata: dict = None, resultname: str = None, id: str = None, - load: bool = False, )-> None: """Compute and store the accuracy, ROC AUC score, and the confusion matrix for a metric. @@ -205,7 +203,7 @@ def __init__( # noqa: PLR0913 predictions_proba: Continuous version of the predicted_labels. signal_values: Values of the signal used by the metric. threshold: Threshold computed by the metric. - audit_indices: The connesponding dataset indices for the results + audit_indices: The corresponding dataset indices for the results id: The identity of the attack load: If the data should be loaded metadata: Metadata about the results @@ -223,7 +221,9 @@ def __init__( # noqa: PLR0913 self.resultname = resultname self.id = id - if load: + if true_labels is None or predicted_labels is None: + self.tn, self.tp, self.fn, self.fp = 0.0, 0.0, 0.0, 0.0 + self.fpr, self.tpr, self.roc_auc = 0.0, 0.0, 0.0 return self.tn = np.sum(true_labels == 0) - np.sum( @@ -235,8 +235,13 @@ def __init__( # noqa: PLR0913 predicted_labels[:, true_labels == 1], axis=1 ) - self.fpr = self.fp / (self.fp + self.tn) - self.tpr = self.tp / (self.tp + self.fn) + self.fpr = np.divide(self.fp.astype(float), (self.fp + self.tn).astype(float), + out=np.zeros_like(self.fp, dtype=float), + where=(self.fp + self.tn) != 0.0) + self.tpr = np.divide(self.tp.astype(float), (self.tp + self.fn).astype(float), + out=np.zeros_like(self.tp, dtype=float), + where=(self.tp + self.fn) != 0.0) + self.roc_auc = auc(self.fpr, self.tpr) @@ -244,7 +249,7 @@ def __init__( # noqa: PLR0913 def load(data: dict) -> None: """Load the MIAResults to disk.""" - miaresult = MIAResult(load=True) + miaresult = MIAResult() miaresult.resultname = data["resultname"] miaresult.resulttype = data["resulttype"] @@ -283,9 +288,9 @@ def save(self:Self, path: str, name: str, config:dict = None, show_plot:bool = F "roc_auc": self.roc_auc, "config": config, "fixed_fpr": fixed_fpr_table, - "audit_indices": self.audit_indices.tolist(), - "signal_values": self.signal_values.tolist(), - "true_labels": self.true_labels.tolist(), + "audit_indices": self.audit_indices.tolist() if self.audit_indices is not None else None, + "signal_values": self.signal_values.tolist() if self.signal_values is not None else None, + "true_labels": self.true_labels.tolist() if self.true_labels is not None else None, "threshold": self.threshold.tolist() if self.threshold is not None else None, "id": name, } @@ -299,19 +304,19 @@ def save(self:Self, path: str, name: str, config:dict = None, show_plot:bool = F json.dump(data, f) # Create ROC plot for MIAResult - filename = f"{save_path}/ROC" - temp_res = MIAResult(load=True) + temp_res = MIAResult() temp_res.tpr = self.tpr temp_res.fpr = self.fpr temp_res.id = self.id self.create_plot(results = [temp_res], - filename = filename, + save_dir = save_path, + save_name = "ROC", show_plot = show_plot ) # Create SignalHistogram plot for MIAResult - filename = f"{save_path}/SignalHistogram.png" - self.create_signal_histogram(filename = filename, + self.create_signal_histogram(save_path = save_path, + save_name = "SignalHistogram", signal_values = self.signal_values, true_labels = self.true_labels, threshold = self.threshold, @@ -324,7 +329,9 @@ def get_strongest(results: list) -> list: return max((res for res in results), key=lambda d: d.roc_auc) def create_signal_histogram( - self:Self, filename: str, + self:Self, + save_path: str, + save_name: str, signal_values: list, true_labels: list, threshold: float, @@ -332,6 +339,7 @@ def create_signal_histogram( ) -> None: """Method to create Signal Histogram.""" + filename = f"{save_path}/{save_name}" values = np.array(signal_values).ravel() labels = np.array(true_labels).ravel() @@ -368,7 +376,7 @@ def create_signal_histogram( plt.xlabel("Signal value") plt.ylabel("Number of samples") plt.title("Signal histogram") - plt.savefig(fname=filename, dpi=1000) + plt.savefig(fname=filename+".png", dpi=1000) if show_plot: plt.show() else: @@ -437,7 +445,7 @@ def _get_results_of_name( def create_results( results: list, save_dir: str = "./", - save_name: str = "foo", + save_name: str = "foo", # noqa: ARG004 show_plot: bool = False, ) -> str: """Result method for MIAResult.""" @@ -508,19 +516,17 @@ def config_latex_style(config: str) -> str: """ return latex_content - - class GIAResults: """Contains results for a GIA attack.""" def __init__( self: Self, - original_data: DataLoader, - recreated_data: DataLoader, - psnr_score: float, - data_mean: float, - data_std: float, - load: bool + original_data: DataLoader = None, + recreated_data: DataLoader = None, + psnr_score: float = None, + data_mean: float = None, + data_std: float = None, + config: dict = None, ) -> None: self.original_data = original_data @@ -528,36 +534,55 @@ def __init__( self.PSNR_score = psnr_score self.data_mean = data_mean self.data_std = data_std + self.config = config - if load: - return - + @staticmethod def load( - self:Self, - data: dict + data:dict ) -> None: """Load the GIAResults from disk.""" - self.original = data["original"] - self.resulttype = data["resulttype"] - self.recreated = data["recreated"] - self.id = data["id"] + giaresult = GIAResults() + + giaresult.original = data["original"] + giaresult.resulttype = data["resulttype"] + giaresult.recreated = data["recreated"] + giaresult.id = data["id"] + giaresult.result_config = data["result_config"] + + return giaresult def save( self: Self, - save_path: str, name: str, - config: dict, - show_plot: bool = False + path: str, + config: dict, # noqa: ARG002 + show_plot: bool = False # noqa: ARG002 ) -> None: """Save the GIAResults to disk.""" - result_config = config["attack_list"][name] + def get_gia_config(instance: Any, skip_keys: List[str] = None) -> dict: + """Extract manually typed variables and their values from a class instance with options to skip keys.""" + if skip_keys is None: + skip_keys = [] + + cls_annotations = instance.__class__.__annotations__ # Get typed attributes + return { + var: getattr(instance, var) + for var in cls_annotations + if var not in skip_keys # Exclude skipped keys + } + + result_config = get_gia_config(self.config, skip_keys=["optimizer", "criterion"]) # Get the name for the attack configuration config_name = get_config_name(result_config) self.id = f"{name}{config_name}" - save_path = f"{save_path}/{name}/{self.id}" + path = f"{path}/gradient_inversion/{self.id}" + + # Check if path exists, otherwise create it. + if not os.path.exists(f"{path}"): + os.makedirs(f"{path}") def extract_tensors_from_subset(dataset: Dataset) -> Tensor: all_tensors = [] @@ -574,46 +599,34 @@ def extract_tensors_from_subset(dataset: Dataset) -> Tensor: original_data = extract_tensors_from_subset(self.original_data.dataset) output_denormalized = clamp(recreated_data * self.data_std + self.data_mean, 0, 1) - recreated = os.path.join(save_path, "recreated_image.png") + recreated = os.path.join(path, "recreated_image.png") save_image(output_denormalized, recreated) gt_denormalized = clamp(original_data * self.data_std + self.data_mean, 0, 1) - original = os.path.join(save_path, "original_image.png") + original = os.path.join(path, "original_image.png") save_image(gt_denormalized, original) - if show_plot: - # Plot output - plt.plot(output_denormalized) - plt.show() - - # Plot ground truth - plt.plot(gt_denormalized) - plt.show() - # Data to be saved data = { "resulttype": self.__class__.__name__, "original": original, "recreated": recreated, + "result_config": result_config, "id": self.id, } - # Check if path exists, otherwise create it. - if not os.path.exists(f"{save_path}"): - os.makedirs(f"{save_path}") - # Save the results to a file - with open(f"{save_path}/data.json", "w") as f: + with open(f"{path}/data.json", "w") as f: json.dump(data, f) @staticmethod def create_results( results: list, - save_dir: str = "./", - save_name: str = "foo", + save_dir: str = "./", # noqa: ARG004 + save_name: str = "foo", # noqa: ARG004 ) -> str: """Result method for GIA.""" - + latex = "" def _latex( save_name: str, original: str, @@ -623,346 +636,19 @@ def _latex( return f""" \\subsection{{{" ".join(save_name.split("_"))}}} \\begin{{figure}}[ht] - \\includegraphics[width=0.8\\textwidth]{{{original}}} + \\includegraphics[width=0.6\\textwidth]{{{original}}} \\caption{{Original}} \\end{{figure}} \\begin{{figure}}[ht] - \\includegraphics[width=0.8\\textwidth]{{{recreated}}} - \\caption{{Original}} + \\includegraphics[width=0.6\\textwidth]{{{recreated}}} + \\caption{{Recreated}} \\end{{figure}} """ - return _latex(save_name=save_name, original=save_dir+"recreated_image.png", recreated=save_dir+"original_image.png") - -# class SyntheticResult: -# """Contains results for SyntheticResult.""" - -# def __init__( # noqa: PLR0913 -# self:Self, -# SynRes: Union[SinglingOutResults, LinkabilityResults, InferenceResults], -# load: bool = False, -# ) -> None: -# """Initalze SyntheticResult method.""" - -# # Initialize values to result object -# self.SynRes = SynRes - -# # Have a method to return if the results are to be loaded -# if load: -# return - -# # Create some result -# self.result_values = self.create_result(self.values) - -# def load( -# self:Self, -# data: dict -# ) -> None: -# """Load the SyntheticResult class to disk.""" -# self.result_values = data["some_result"] - -# def save( -# self:Self, -# save_path: str, -# save_name: str, -# config:dict = None -# ) -> None: -# """Save the SyntheticResult class to disk.""" - -# result_config = config["attack_list"][name] - -# # Get the name for the attack configuration -# config_name = get_config_name(result_config) -# self.id = f"{name}{config_name}" -# save_path = f"{path}/{name}/{self.id}" - -# save_name = os.path.join(save_path, f"synthetic.png") - -# SyntheticResult.plot( -# res=self.SynRes, -# show=False, -# save=True, -# save_path=save_path, -# save_name=save_name, -# ) - -# # Data to be saved -# data = { -# "synthetic_result_name": self.SynRes.__class__.__name__, -# "synthetic_result": self.SynRes, -# "image_path": save_name, -# "id": self.id -# } - -# # Check if path exists, otherwise create it. -# if not os.path.exists(f"{save_path}"): -# os.makedirs(f"{save_path}") - -# # Save the results to a file -# with open(f"{save_path}/data.json", "w") as f: -# json.dump(data, f) - -# @staticmethod -# def create_results( -# results: list, -# save_dir: str = "./", -# save_name: str = "foo", -# ) -> str: -# """Result method for SyntheticResult.""" - -# def _latex(save_name: str, result_file: str) -> str: -# """Latex method for SyntheticResult.""" -# return f""" -# \\subsection{{{" ".join(save_name.split("_"))}}} -# \\begin{{figure}}[ht] -# \\includegraphics[width=0.8\\textwidth]{{{result_file}}} -# \\caption{{Original}} -# \\end{{figure}} -# """ -# return _latex(results=results, save_name=save_name, result_file=save_dir+"synthetic.png") - -# @staticmethod -# def plot( -# res: Union[SinglingOutResults, LinkabilityResults, InferenceResults], -# high_res_flag: bool = True, -# case_flag: str = "base", -# show:bool = True, -# save:bool = False, -# save_path:str = "./", -# save_name:str = "fig.png", -# ) -> None: - -# save_name = os.path.join(save_path, save_name) - -# SyntheticResultName = res.__class__.__name__ -# if SyntheticResultName == "SinglingOutResults": -# SyntheticResult.plot_singling_out(sin_out_res=res, -# high_res_flag = high_res_flag, -# show=show, -# save=save, -# save_name=save_name) - -# elif SyntheticResultName == "LinkabilityResults": -# SyntheticResult.plot_linkability(link_res=res, -# high_res_flag = high_res_flag, -# show=show, -# save=save, -# save_name=save_name) - -# elif SyntheticResultName == "InferenceResults": -# if case_flag == "base": -# SyntheticResult.plot_ir_base_case(inf_res=res, -# high_res_flag = high_res_flag, -# show=show, -# save=save, -# save_name=save_name) -# elif case_flag == "worst": -# SyntheticResult.plot_ir_worst_case(inf_res=res, -# high_res_flag = high_res_flag, -# show=show, -# save=save, -# save_name=save_name) -# else: -# print("No such case") - -# def plot_ir_base_case( -# *, -# inf_res: InferenceResults, -# high_res_flag: bool = True, -# show: bool = True, -# save: bool = False, -# save_name: str = None, -# ) -> None: -# """Function to plot inference results base case given results. - -# Note: function is not tested and is used in examples. -# """ -# #Set res, secrets, set_secrets and set_nr_aux_cols -# res = np.array(inf_res.res) -# secrets = np.array(inf_res.secrets) -# set_secrets = sorted(set(secrets)) -# set_nr_aux_cols = np.unique(res[:,-1].astype(int)) -# # High res flag -# if high_res_flag: -# plot_save_high_res() -# # Set up the figure and get axes -# fig_title = f"Inference risk, base case scenario, {conf_level} confidence, total attacks: {int(res[:,0].sum())}" -# axs = get_figure_axes(two_axes_flag=True, fig_title=fig_title) -# # Set plot variables -# titles = ["Risk per column", "Risk per Nr aux cols"] -# xlabels = ["Secret col", "Nr aux cols"] -# sets_values = [set_secrets, set_nr_aux_cols] -# valueses = [secrets, res[:,-1]] -# assert len(axs) == len(titles) -# assert len(axs) == len(xlabels) -# assert len(axs) == len(sets_values) -# assert len(axs) == len(valueses) -# #Plotting -# for ax, title, xlabel, set_values, values in zip(axs, titles, xlabels, sets_values, valueses): -# set_labels_and_title( -# ax = ax, -# xlabel = xlabel, -# ylabel = "Risk", -# title = title -# ) -# # Iterate through values and plot bar charts -# iterate_values_plot_bar_charts(ax=ax, res=res, set_values=set_values, values=values) -# # Adding ticks -# set_ticks(ax=ax, xlabels=set_values) -# # Adding legend -# set_legend(ax=ax) -# # Save plot -# if save: -# plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") -# # Show plot -# if show: -# plt.show() -# else: -# plt.clf() - -# def plot_ir_worst_case( -# *, -# inf_res: InferenceResults, -# high_res_flag: bool = True, -# show: bool = True, -# save: bool = False, -# save_name: str = None, -# ) -> None: -# """Function to plot inference results worst case given results. - -# Note: function is not tested and is used in examples. -# """ -# #Set res, secrets and set_secrets -# res = np.array(inf_res.res) -# secrets = np.array(inf_res.secrets) -# set_secrets = sorted(set(secrets)) -# # High res flag -# if high_res_flag: -# plot_save_high_res() -# # Set up the figure and get axes -# ax = get_figure_axes() -# # Iterate through secrets and plot bar charts -# iterate_values_plot_bar_charts( -# ax = ax, -# res = res, -# set_values = set_secrets, -# values = secrets, -# max_value_flag = True -# ) -# # Adding labels and title -# set_labels_and_title( -# ax = ax, -# xlabel = "Secret col", -# ylabel = "Risk", -# title = f"Inference risk, worst case scenario, total attacks: {int(res[:,0].sum())}" -# ) -# # Adding ticks -# set_ticks(ax=ax, xlabels=set_secrets) -# # Adding legend -# set_legend(ax=ax) -# # Save plot -# if save: -# plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") -# # Show plot -# if show: -# plt.show() -# else: -# plt.clf() - -# @staticmethod -# def plot_linkability( -# *, -# link_res:LinkabilityResults, -# high_res_flag: bool = False, -# show: bool = True, -# save: bool = False, -# save_name: str = None, -# ) -> None: -# """Function to plot linkability results from given res. - -# Note: function is not tested and is used in examples. -# """ -# # Get res and aux_cols_nr -# res = np.array(link_res.res) -# set_nr_aux_cols = np.unique(res[:,-1].astype(int)) -# # High res flag -# if high_res_flag: -# plot_save_high_res() -# # Set up the figure and get axes -# ax = get_figure_axes() -# # Iterate through nr of columns and plot bar charts -# iterate_values_plot_bar_charts(ax=ax, res=res, set_values=set_nr_aux_cols, values=res[:, -1]) -# # Adding labels and title -# set_labels_and_title( -# ax = ax, -# xlabel = "Nr aux cols", -# ylabel = "Risk", -# title = f"Linkability risk {conf_level} confidence, total attacks: {int(res[:,0].sum())}" -# ) -# # Adding ticks -# set_ticks(ax=ax, xlabels=set_nr_aux_cols) -# # Adding legend -# set_legend(ax=ax) -# # Save plot -# if save: -# plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") -# # Show plot -# if show: -# plt.show() -# else: -# plt.clf() - -# @staticmethod -# def plot_singling_out( -# *, -# sin_out_res: SinglingOutResults, -# high_res_flag: bool = True, -# show: bool = True, -# save: bool = False, -# save_name: str = None, -# ) -> None: -# """Function to plot singling out given results. - -# Note: function is not tested and is used in examples. -# """ -# #Set res, n_cols and set_n_cols -# res = np.array(sin_out_res.res) -# n_cols = res[:,-1].astype(int).tolist() -# set_n_cols = np.unique(n_cols) -# # High res flag -# if high_res_flag: -# plot_save_high_res() -# # Set up the figure and get axes -# ax = get_figure_axes() -# # Iterate through values and plot bar charts -# iterate_values_plot_bar_charts( -# ax = ax, -# res = res, -# set_values = set_n_cols, -# values = n_cols, -# max_value_flag = True -# ) -# # Adding labels and title -# fig_title = f"Singling out risk total attacks: {int(res[:,0].sum())}" -# if res.shape[0]==1: -# fig_title += f", n_cols={int(res[0,-1])}" -# set_labels_and_title( -# ax = ax, -# xlabel = "n_cols for predicates", -# ylabel = "Risk", -# title = fig_title -# ) -# # Adding ticks -# set_ticks(ax=ax, xlabels=set_n_cols) -# # Adding legend -# if save: -# plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") -# # Show plot -# if show: -# plt.show() -# else: -# plt.clf() - + unique_names = reduce_to_unique_labels(results) + for res, name in zip(results, unique_names): + latex += _latex(save_name=name, original=res.original, recreated=res.recreated) + return latex class TEMPLATEResult: @@ -970,27 +656,24 @@ class TEMPLATEResult: def __init__( # noqa: PLR0913 self:Self, - values: list, - load: bool = False, + values: list = None, ) -> None: - """Initalze Result method.""" + """Initialize the result method.""" # Initialize values to result object self.values = values - # Have a method to return if the results are to be loaded - if load: - return - - # Create some result + # Create some latex result self.result_values = self.create_result(self.values) + @staticmethod def load( - self:Self, data: dict ) -> None: """Load the TEMPLATEResult class to disk.""" - self.result_values = data["some_result"] + template_res = TEMPLATEResult() + template_res.values = data["some_values"] + return template_res def save( self:Self, @@ -1000,15 +683,13 @@ def save( ) -> None: """Save the TEMPLATEResult class to disk.""" - result_config = config["attack_list"][name] - # Data to be saved data = { - "some_result": self.result_values + "some_values": self.values } # Get the name for the attack configuration - config_name = get_config_name(result_config) + config_name = get_config_name(config) self.id = f"{name}{config_name}" # Check if path exists, otherwise create it. diff --git a/leakpro/reporting/report_handler.py b/leakpro/reporting/report_handler.py index ec6e3621..2b9f39e7 100644 --- a/leakpro/reporting/report_handler.py +++ b/leakpro/reporting/report_handler.py @@ -6,6 +6,8 @@ import subprocess from leakpro.metrics.attack_result import GIAResults, MIAResult +from leakpro.synthetic_data_attacks.inference_utils import InferenceResults +from leakpro.synthetic_data_attacks.linkability_utils import LinkabilityResults from leakpro.synthetic_data_attacks.singling_out_utils import SinglingOutResults from leakpro.utils.import_helper import Self, Union from leakpro.utils.logger import setup_logger @@ -26,13 +28,15 @@ def __init__(self:Self, report_dir: str = None, logger:logging.Logger = None) -> self.leakpro_types = ["MIAResult", "GIAResults", "SinglingOutResults", + "InferenceResults", + "LinkabilityResults" ] # Initiate empty lists for the different types of LeakPro attack types for key in self.leakpro_types: self.pdf_results[key] = [] - def _try_find_rep_dir(self): + def _try_find_rep_dir(self:Self) -> str: save_path = "../leakpro_output/results" # Check if path exists, otherwise create it. for _ in range(3): @@ -44,10 +48,17 @@ def _try_find_rep_dir(self): if not os.path.exists(save_path): save_path = "../../leakpro_output/results" os.makedirs(save_path) - return save_path + return save_path - def save_results(self:Self, attack_name: str = None, result_data: Union[MIAResult, GIAResults, SinglingOutResults] = None, config: dict = None) -> None: + def save_results(self:Self, + attack_name: str = None, + result_data: Union[MIAResult, + GIAResults, + InferenceResults, + LinkabilityResults, + SinglingOutResults] = None, + config: dict = None) -> None: """Save method for results.""" self.logger.info(f"Saving results for {attack_name}") @@ -69,62 +80,51 @@ def load_results(self:Self) -> None: resulttype = data["resulttype"] # Dynamically get the class from its name (resulttype) - # This assumes that the class is already defined in the current module or imported + # This assumes that the class is already defined in the current module or imported to context if resulttype in globals() and callable(globals()[resulttype]): cls = globals()[resulttype] else: raise ValueError(f"Class '{resulttype}' not found.") - # Initialize the class using the saved primitives - # instance = cls(load=True) - data["id"] = subdir.name instance = cls.load(data) - - # if instance.id is None: - # instance.id = subdir.name - - # if instance.resultname is None: - # instance.resultname = parentdir.name - self.results.append(instance) except Exception as e: - self.logger.info(f"Not able to load data, Error: {e}") + self.logger.info(f"In ReportHandler.load_results(), Not able to load data, Error: {e}") def create_results( self:Self, - types: list = [], + types: list = None, ) -> None: """Result method to group all attacks.""" for result_type in types: - # try: - # Get all results of type "Result" - # results = [res for res in self.results if res.resulttype == result_type] - results = [res for res in self.results if res.__class__.__name__ == result_type] + try: + # Get all results of type result_type + results = [res for res in self.results if res.__class__.__name__ == result_type] - # If no results of type "result_type" is found, skip to next result_type - if not results: - self.logger.info(f"No results of type {result_type} found.") - continue + # If no results of type "result_type" is found, skip to next result_type + if not results: + self.logger.info(f"No results of type {result_type} found.") + continue - # Check if the result type has a 'create_results' method - try: - result_class = globals().get(result_type) - except Exception as e: - self.logger.info(f"No {result_type} class could be found or exists. Error: {e}") - continue + # Check if the result type has a 'create_results' method + try: + result_class = globals().get(result_type) + except Exception as e: + self.logger.info(f"No {result_type} class could be found or exists. Error: {e}") + continue - if hasattr(result_class, "create_results") and callable(result_class.create_results): + if hasattr(result_class, "create_results") and callable(result_class.create_results): - # Create all results - latex_results = result_class.create_results(results=results, - save_dir=self.report_dir, - ) - self.pdf_results[result_type].append(latex_results) + # Create all results + latex_results = result_class.create_results(results=results, + save_dir=self.report_dir, + ) + self.pdf_results[result_type].append(latex_results) - # except Exception as e: - # self.logger.info(f"Error in results all: {result_class}, {e}") + except Exception as e: + self.logger.info(f"Error in results all: {result_class}, {e}") def create_results_all( self:Self, @@ -148,7 +148,8 @@ def create_results_syn( self:Self, ) -> None: """Method to create Synthetic results.""" - self.create_results(types=["SinglingOutResults", + + self.create_results(types=["SinglingOutResults", "InferenceResults", "LinkabilityResults"]) diff --git a/leakpro/run.py b/leakpro/run.py index d8f97da4..80d9551b 100644 --- a/leakpro/run.py +++ b/leakpro/run.py @@ -15,7 +15,8 @@ def run_inverting(model: Module, client_data: DataLoader, train_fn: Callable, """Runs InvertingGradients.""" attack = InvertingGradients(model, client_data, train_fn, data_mean, data_std, config) result = attack.run_attack() - result.prepare_privacy_risk_report(experiment_name, "./leakpro_output/results") + result.save(name=experiment_name, path="./leakpro_output/results") + return result def run_inverting_audit(model: Module, dataset: Dataset, train_fn: Callable, data_mean: torch.Tensor, data_std: torch.Tensor diff --git a/leakpro/synthetic_data_attacks/inference_utils.py b/leakpro/synthetic_data_attacks/inference_utils.py index d5915350..df35d694 100755 --- a/leakpro/synthetic_data_attacks/inference_utils.py +++ b/leakpro/synthetic_data_attacks/inference_utils.py @@ -1,14 +1,16 @@ """Inference risk util functions.""" import itertools +import json import math +import os import random -from typing import List, Union from pandas import DataFrame from pydantic import BaseModel from leakpro.synthetic_data_attacks.anonymeter.evaluators.inference_evaluator import InferenceEvaluator from leakpro.synthetic_data_attacks.utils import load_res_json_file, save_res_json_file +from leakpro.utils.import_helper import List, Self, Union class InferenceResults(BaseModel): @@ -31,6 +33,101 @@ class InferenceResults(BaseModel): res: List[List[Union[int,float]]] aux_cols: List[List[str]] secrets: List[str] + worst_case_flag: bool + + def save(self:Self, + path: str = "../leakpro_output/results/", + name: str = "inference", + case_flag:str = "base", # noqa: ARG002 + config: dict = None # noqa: ARG002 + ) -> None: + """Save method for InferenceResults.""" + + # Data to be saved + data = { + "resulttype": self.__class__.__name__, + "resultname": name, + "res": self.model_dump(), + } + + # Check if path exists, otherwise create it. + for _ in range(3): + if os.path.exists(path): + break + path = "../" + path + + # If no result folder can be found + if not os.path.exists(path): + os.makedirs("../../leakpro_output/results/") + + # Save the results to a file + if not os.path.exists(f"{path}/inference_risk/{name}"): + os.makedirs(f"{path}/inference_risk/{name}") + + with open(f"{path}/inference_risk/{name}/data.json", "w") as f: + json.dump(data, f) + + + self.plot(worst_case_flag=self.worst_case_flag, + show=False, + save=True, + save_path=f"{path}/inference_risk/{name}", + save_name=name + ) + + @staticmethod + def load(data: dict) -> "InferenceResults": + """Load method for InferenceResults.""" + return InferenceResults(res=data["res"]["res"], + res_cols=data["res"]["res_cols"], + aux_cols=data["res"]["aux_cols"], + secrets=data["res"]["secrets"], + worst_case_flag=data["res"]["worst_case_flag"]) + + def plot(self:Self, + high_res_flag: bool = False, + worst_case_flag: bool = False, + show: bool = True, + save: bool = False, + save_path: str = "./", + save_name: str = "fig.png" + ) -> None: + """Plot method for InferenceResults.""" + from leakpro.synthetic_data_attacks.plots import plot_ir_base_case, plot_ir_worst_case + + plot_inference = plot_ir_worst_case if worst_case_flag else plot_ir_base_case + plot_inference(inf_res=InferenceResults(res=self.res, + res_cols=self.res_cols, + aux_cols=self.aux_cols, + secrets=self.secrets, + worst_case_flag=self.worst_case_flag), + high_res_flag=high_res_flag, + show=show, + save=save, + save_name=f"{save_path}/{save_name}") + + @staticmethod + def create_results(results: list, save_dir: str = "./") -> str: + """Result method for InferenceResults.""" + latex = "" + + def _latex(save_dir: str, save_name: str) -> str: + """Latex method for InferenceResults.""" + filename = f"{save_dir}/{save_name}.png" + return f""" + \\subsection{{{" ".join(save_name.split("_"))}}} + \\begin{{figure}}[ht] + \\includegraphics[width=0.8\\textwidth]{{{filename}}} + \\caption{{Original}} + \\end{{figure}} + """ + + for res in results: + name = "inference_"+("worst_case" if res.worst_case_flag else "base_case") + + res.plot(show=False, save=True, save_path=save_dir, save_name=name) + latex += _latex(save_dir=save_dir, save_name=name) + return latex def get_inference_prefix(*, worst_case_flag: bool) -> str: """Auxiliary function to get inference prefix used in json results filename.""" @@ -166,7 +263,8 @@ def inference_risk_evaluation( res_cols = res_cols, res = res, aux_cols = aux_cols_all, - secrets = secrets + secrets = secrets, + worst_case_flag = worst_case_flag ) #Save results to json if save_results_json: diff --git a/leakpro/synthetic_data_attacks/linkability_utils.py b/leakpro/synthetic_data_attacks/linkability_utils.py index 27c33bca..da482c35 100755 --- a/leakpro/synthetic_data_attacks/linkability_utils.py +++ b/leakpro/synthetic_data_attacks/linkability_utils.py @@ -1,7 +1,8 @@ """Linkability risk util functions.""" import itertools +import json import math -from typing import List, Union +import os import numpy as np from pandas import DataFrame @@ -9,6 +10,7 @@ from leakpro.synthetic_data_attacks.anonymeter.evaluators.linkability_evaluator import LinkabilityEvaluator from leakpro.synthetic_data_attacks.utils import load_res_json_file, save_res_json_file +from leakpro.utils.import_helper import List, Self, Union def aux_assert_input_values_get_combs_2_buckets(*, cols: List, buck1_nr: int, buck2_nr: int) -> None: @@ -193,6 +195,81 @@ class LinkabilityResults(BaseModel): res: List[List[Union[int,float]]] aux_cols: List[List[List[str]]] + def save(self:Self, path: str = "../leakpro_output/results/", name: str = "linkability", config: dict = None) -> None: # noqa: ARG002 + """Save method for LinkabilityResults.""" + + id = "linkability" + + # Data to be saved + data = { + "resulttype": self.__class__.__name__, + "resultname": name, + "res": self.model_dump(), + "id": id, + } + + # Check if path exists, otherwise create it. + for _ in range(3): + if os.path.exists(path): + break + path = "../" + path + + # If no result folder can be found + if not os.path.exists(path): + os.makedirs("../../leakpro_output/results/") + + # Save the results to a file + if not os.path.exists(f"{path}/{name}/{id}"): + os.makedirs(f"{path}/{name}/{id}") + + with open(f"{path}/{name}/{id}/data.json", "w") as f: + json.dump(data, f) + + self.plot(show=False, + save=True, + save_path=f"{path}", + save_name=f"{name}/{id}/{name}") + + @staticmethod + def load(data: dict) -> "LinkabilityResults": + """Load method for LinkabilityResults.""" + return LinkabilityResults(res=data["res"]["res"], + res_cols=data["res"]["res_cols"], + aux_cols=data["res"]["aux_cols"]) + + def plot(self:Self, high_res_flag: bool = False, show: bool = True, save: bool = False, + save_path: str = "./", save_name: str = "fig.png") -> None: + """Plot method for LinkabilityResults.""" + from leakpro.synthetic_data_attacks.plots import plot_linkability + plot_linkability(link_res=LinkabilityResults(res=self.res, + res_cols=self.res_cols, + aux_cols=self.aux_cols), + high_res_flag=high_res_flag, + show=show, + save=save, + save_name=f"{save_path}/{save_name}") + + @staticmethod + def create_results(results: list, save_dir: str = "./") -> str: + """Result method for LinkabilityResults.""" + latex = "" + + def _latex(save_dir: str, save_name: str) -> str: + """Latex method for LinkabilityResults.""" + filename = f"{save_dir}/{save_name}.png" + return f""" + \\subsection{{{" ".join(save_name.split("_"))}}} + \\begin{{figure}}[ht] + \\includegraphics[width=0.8\\textwidth]{{{filename}}} + \\caption{{Original}} + \\end{{figure}} + """ + + for res in results: + res.plot(show=False, save=True, save_path=save_dir, save_name="linkability") + latex += _latex(save_dir=save_dir, save_name="linkability") + return latex + def linkability_risk_evaluation( ori: DataFrame, syn: DataFrame, diff --git a/leakpro/synthetic_data_attacks/plots.py b/leakpro/synthetic_data_attacks/plots.py index 4540f474..6293c0b9 100755 --- a/leakpro/synthetic_data_attacks/plots.py +++ b/leakpro/synthetic_data_attacks/plots.py @@ -82,7 +82,13 @@ def iterate_values_plot_bar_charts(*, max_value = res[:, 4:7].max() ax.set_ylim(0, max_value * 1.05) -def plot_linkability(*, link_res: LinkabilityResults, high_res_flag: bool = True) -> None: +def plot_linkability(*, + link_res: LinkabilityResults, + high_res_flag: bool = True, + show: bool = True, + save: bool = False, + save_name: str = None + ) -> None: """Function to plot linkability results from given res. Note: function is not tested and is used in examples. @@ -108,10 +114,22 @@ def plot_linkability(*, link_res: LinkabilityResults, high_res_flag: bool = True set_ticks(ax=ax, xlabels=set_nr_aux_cols) # Adding legend set_legend(ax=ax) + # Save and or show figure + if save: + plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") # Show plot - plt.show() + if show: + plt.show() + else: + plt.clf() -def plot_ir_worst_case(*, inf_res: InferenceResults, high_res_flag: bool = True) -> None: +def plot_ir_worst_case(*, + inf_res: InferenceResults, + high_res_flag: bool = True, + show: bool = True, + save: bool = False, + save_name: str = None + ) -> None: """Function to plot inference results worst case given results. Note: function is not tested and is used in examples. @@ -144,10 +162,22 @@ def plot_ir_worst_case(*, inf_res: InferenceResults, high_res_flag: bool = True) set_ticks(ax=ax, xlabels=set_secrets) # Adding legend set_legend(ax=ax) + # Save and or show figure + if save: + plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") # Show plot - plt.show() + if show: + plt.show() + else: + plt.clf() -def plot_ir_base_case(*, inf_res: InferenceResults, high_res_flag: bool = True) -> None: +def plot_ir_base_case(*, + inf_res: InferenceResults, + high_res_flag: bool = True, + show: bool = True, + save: bool = False, + save_name: str = None + ) -> None: """Function to plot inference results base case given results. Note: function is not tested and is used in examples. @@ -187,14 +217,21 @@ def plot_ir_base_case(*, inf_res: InferenceResults, high_res_flag: bool = True) # Adding legend set_legend(ax=ax) plt.tight_layout() - plt.show() + # Save and or show figure + if save: + plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") + # Show plot + if show: + plt.show() + else: + plt.clf() def plot_singling_out(*, - sin_out_res: SinglingOutResults, - high_res_flag: bool = True, - show: bool = True, - save: bool = False, - save_name: str = None + sin_out_res: SinglingOutResults, + high_res_flag: bool = True, + show: bool = True, + save: bool = False, + save_name: str = None ) -> None: """Function to plot singling out given results. @@ -231,7 +268,7 @@ def plot_singling_out(*, set_ticks(ax=ax, xlabels=set_n_cols) # Adding legend set_legend(ax=ax) - + # Save and or show figure if save: plt.savefig(fname=f"{save_name}.png", dpi=1000, bbox_inches="tight") # Show plot diff --git a/leakpro/synthetic_data_attacks/singling_out_utils.py b/leakpro/synthetic_data_attacks/singling_out_utils.py index f3c302ef..b2c7b13a 100755 --- a/leakpro/synthetic_data_attacks/singling_out_utils.py +++ b/leakpro/synthetic_data_attacks/singling_out_utils.py @@ -3,13 +3,13 @@ import multiprocessing as mp import os from itertools import repeat -from typing import Any, Callable, Dict, List, Optional, Self, Tuple, Union from pandas import DataFrame from pydantic import BaseModel from leakpro.synthetic_data_attacks.anonymeter.evaluators.singling_out_evaluator import SinglingOutEvaluator from leakpro.synthetic_data_attacks.utils import load_res_json_file, save_res_json_file +from leakpro.utils.import_helper import Any, Callable, Dict, List, Optional, Self, Tuple, Union class SinglingOutResults(BaseModel): diff --git a/leakpro/synthetic_data_attacks/utils.py b/leakpro/synthetic_data_attacks/utils.py index 80046150..f3ea7509 100755 --- a/leakpro/synthetic_data_attacks/utils.py +++ b/leakpro/synthetic_data_attacks/utils.py @@ -3,28 +3,28 @@ import os from typing import Tuple -#Path to save results -PATH_RESULTS = os.path.dirname(os.path.dirname(__file__)) + "/synthetic_data_attacks/results/" +# Default path to save results +DEFAULT_PATH_RESULTS = os.path.dirname(os.path.dirname(__file__)) + "/synthetic_data_attacks/results/" -def aux_file_path(*, prefix: str, dataset: str) -> Tuple[str,str]: +def aux_file_path(*, path: str = None, prefix: str, dataset: str) -> Tuple[str, str]: """Util function that returns file and file_path for given prefix and dataset.""" if prefix: prefix += "_" file = "res_" + prefix + dataset + ".json" - file_path = PATH_RESULTS + file + file_path = os.path.join(path or DEFAULT_PATH_RESULTS, file) return file, file_path -def save_res_json_file(*, prefix: str, dataset: str, res: dict) -> None: +def save_res_json_file(*, path: str = None, prefix: str, dataset: str, res: dict) -> None: """Util function that saves results dictionary into a json file with given prefix and dataset name.""" - file, file_path = aux_file_path(prefix=prefix, dataset=dataset) - #Create directory if does not exist - os.makedirs(os.path.dirname(PATH_RESULTS), exist_ok=True) + file, file_path = aux_file_path(path=path, prefix=prefix, dataset=dataset) + # Create directory if it does not exist + os.makedirs(os.path.dirname(file_path), exist_ok=True) with open(file_path, "w") as f: json.dump(res, f, indent=4) - print("\n### Results saved!", file) # noqa: T201 + print("\n### Results saved!", file) # noqa: T201 -def load_res_json_file(*, prefix: str, dataset: str) -> dict: +def load_res_json_file(*, path: str = None, prefix: str, dataset: str) -> dict: """Util function that loads and returns results from json file with given prefix and dataset name.""" - _, file_path = aux_file_path(prefix=prefix, dataset=dataset) + _, file_path = aux_file_path(path=path, prefix=prefix, dataset=dataset) with open(file_path, "r") as f: return json.load(f) From 54815abfcde4afa2a458e5af9c9088b484e55f01 Mon Sep 17 00:00:00 2001 From: henrikfo Date: Thu, 5 Dec 2024 10:26:13 +0000 Subject: [PATCH 11/14] fixed gia report_handler --- examples/report_handler/report_handler.ipynb | 75 ++++++++++---------- leakpro/run.py | 5 +- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/examples/report_handler/report_handler.ipynb b/examples/report_handler/report_handler.ipynb index 5e713e1d..a5f785a9 100644 --- a/examples/report_handler/report_handler.ipynb +++ b/examples/report_handler/report_handler.ipynb @@ -163,44 +163,41 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-11-27 08:38:00,656 INFO Inverting gradient initialized.\n", - "2024-11-27 08:38:02,796 INFO Iteration 0, loss 0.0003550234832800925\n", - "2024-11-27 08:38:02,803 INFO New best loss: 0.0003550234832800925 on round: 0\n", - "2024-11-27 08:38:02,946 INFO New best loss: 0.00033073360100388527 on round: 1\n", - "2024-11-27 08:38:03,193 INFO New best loss: 0.0003084556374233216 on round: 3\n", - "2024-11-27 08:38:03,552 INFO New best loss: 0.0002839812950696796 on round: 6\n", - "2024-11-27 08:38:03,674 INFO New best loss: 0.00028284190921112895 on round: 7\n", - "2024-11-27 08:38:03,802 INFO New best loss: 0.00027706200489774346 on round: 8\n", - "2024-11-27 08:38:03,924 INFO New best loss: 0.00026298483135178685 on round: 9\n", - "2024-11-27 08:38:04,303 INFO New best loss: 0.00025501404888927937 on round: 12\n", - "2024-11-27 08:38:04,427 INFO New best loss: 0.00025296382955275476 on round: 13\n", - "2024-11-27 08:38:04,677 INFO New best loss: 0.00024899665731936693 on round: 15\n", - "2024-11-27 08:38:04,807 INFO New best loss: 0.0002483891148585826 on round: 16\n", - "2024-11-27 08:38:04,936 INFO New best loss: 0.0002449913590680808 on round: 17\n", - "2024-11-27 08:38:05,063 INFO New best loss: 0.0002429946616757661 on round: 18\n", - "2024-11-27 08:38:05,185 INFO New best loss: 0.00023749274259898812 on round: 19\n", - "2024-11-27 08:38:05,890 INFO New best loss: 0.0002372416784055531 on round: 25\n", - "2024-11-27 08:38:06,015 INFO New best loss: 0.00023295756545849144 on round: 26\n", - "2024-11-27 08:38:06,139 INFO New best loss: 0.00023274944396689534 on round: 27\n", - "2024-11-27 08:38:06,266 INFO New best loss: 0.00023147281899582595 on round: 28\n", - "2024-11-27 08:38:06,394 INFO New best loss: 0.00022923419601283967 on round: 29\n", - "2024-11-27 08:38:06,646 INFO New best loss: 0.00022827980865258723 on round: 31\n", - "2024-11-27 08:38:06,775 INFO New best loss: 0.00022587741841562092 on round: 32\n", - "2024-11-27 08:38:06,907 INFO New best loss: 0.00022527067631017417 on round: 33\n", - "2024-11-27 08:38:08,442 INFO New best loss: 0.00022461664048023522 on round: 45\n", - "2024-11-27 08:38:08,569 INFO New best loss: 0.00022421970788855106 on round: 46\n", - "2024-11-27 08:38:08,696 INFO New best loss: 0.0002231039834441617 on round: 47\n", - "2024-11-27 08:38:08,819 INFO New best loss: 0.00022097492183092982 on round: 48\n", - "2024-11-27 08:38:10,106 INFO New best loss: 0.00022094276209827513 on round: 59\n", - "2024-11-27 08:38:10,216 INFO New best loss: 0.00022092672588769346 on round: 60\n", - "2024-11-27 08:38:10,328 INFO New best loss: 0.00022070117120165378 on round: 61\n", - "2024-11-27 08:38:10,434 INFO New best loss: 0.00022059425828047097 on round: 62\n", - "2024-11-27 08:38:10,539 INFO New best loss: 0.00022042241471353918 on round: 63\n", - "2024-11-27 08:38:11,368 INFO New best loss: 0.00022039355826564133 on round: 71\n", - "2024-11-27 08:38:11,472 INFO New best loss: 0.00022023300698492676 on round: 72\n", - "2024-11-27 08:38:11,680 INFO New best loss: 0.0002201125753344968 on round: 74\n", - "2024-11-27 08:38:12,092 INFO New best loss: 0.00021973479306325316 on round: 78\n", - "2024-11-27 08:38:12,195 INFO New best loss: 0.00021972827380523086 on round: 79\n", + "2024-12-05 10:20:59,883 INFO Inverting gradient initialized.\n", + "2024-12-05 10:21:01,974 INFO Iteration 0, loss 0.00037895439891144633\n", + "2024-12-05 10:21:01,981 INFO New best loss: 0.00037895439891144633 on round: 0\n", + "2024-12-05 10:21:02,261 INFO New best loss: 0.00036255631130188704 on round: 2\n", + "2024-12-05 10:21:02,527 INFO New best loss: 0.0003450227959547192 on round: 4\n", + "2024-12-05 10:21:02,669 INFO New best loss: 0.00032889979775063694 on round: 5\n", + "2024-12-05 10:21:02,807 INFO New best loss: 0.00031025282805785537 on round: 6\n", + "2024-12-05 10:21:03,075 INFO New best loss: 0.00029653109959326684 on round: 8\n", + "2024-12-05 10:21:03,212 INFO New best loss: 0.00029343905043788254 on round: 9\n", + "2024-12-05 10:21:03,357 INFO New best loss: 0.0002867175207938999 on round: 10\n", + "2024-12-05 10:21:03,482 INFO New best loss: 0.0002853123296517879 on round: 11\n", + "2024-12-05 10:21:03,607 INFO New best loss: 0.00027869627228938043 on round: 12\n", + "2024-12-05 10:21:03,735 INFO New best loss: 0.0002784693206194788 on round: 13\n", + "2024-12-05 10:21:04,092 INFO New best loss: 0.00027562331524677575 on round: 14\n", + "2024-12-05 10:21:05,240 INFO New best loss: 0.0002711182169150561 on round: 23\n", + "2024-12-05 10:21:05,367 INFO New best loss: 0.00026883225655183196 on round: 24\n", + "2024-12-05 10:21:05,622 INFO New best loss: 0.0002568941272329539 on round: 26\n", + "2024-12-05 10:21:05,748 INFO New best loss: 0.0002561017172411084 on round: 27\n", + "2024-12-05 10:21:05,876 INFO New best loss: 0.0002496801607776433 on round: 28\n", + "2024-12-05 10:21:06,517 INFO New best loss: 0.0002473248168826103 on round: 33\n", + "2024-12-05 10:21:06,648 INFO New best loss: 0.0002469118044245988 on round: 34\n", + "2024-12-05 10:21:07,403 INFO New best loss: 0.00024639995535835624 on round: 40\n", + "2024-12-05 10:21:07,532 INFO New best loss: 0.000243801434407942 on round: 41\n", + "2024-12-05 10:21:07,670 INFO New best loss: 0.0002417033538222313 on round: 42\n", + "2024-12-05 10:21:07,803 INFO New best loss: 0.0002388415450695902 on round: 43\n", + "2024-12-05 10:21:07,924 INFO New best loss: 0.00023807109391782433 on round: 44\n", + "2024-12-05 10:21:08,178 INFO New best loss: 0.00023767446691635996 on round: 46\n", + "2024-12-05 10:21:08,302 INFO New best loss: 0.0002361331571592018 on round: 47\n", + "2024-12-05 10:21:08,427 INFO New best loss: 0.00023581282584927976 on round: 48\n", + "2024-12-05 10:21:09,189 INFO New best loss: 0.0002355617325520143 on round: 54\n", + "2024-12-05 10:21:09,321 INFO New best loss: 0.00023450757726095617 on round: 55\n", + "2024-12-05 10:21:09,447 INFO New best loss: 0.0002343545202165842 on round: 56\n", + "2024-12-05 10:21:09,702 INFO New best loss: 0.0002338749181944877 on round: 58\n", + "2024-12-05 10:21:10,466 INFO New best loss: 0.00023371708812192082 on round: 64\n", + "2024-12-05 10:21:10,595 INFO New best loss: 0.00023358875478152186 on round: 65\n", "/opt/conda/lib/python3.10/site-packages/torchmetrics/utilities/prints.py:70: FutureWarning: Importing `peak_signal_noise_ratio` from `torchmetrics.functional` was deprecated and will be removed in 2.0. Import `peak_signal_noise_ratio` from `torchmetrics.image` instead.\n", " _future_warning(\n" ] @@ -225,7 +222,7 @@ "configs = InvertingConfig()\n", "configs.at_iterations = 80 # Decreased from 8000 to avoid GPU memory crash\n", "\n", - "GIA_result = run_inverting(model, client_dataloader, train_fn, data_mean, data_std, configs)" + "GIA_result = run_inverting(model, client_dataloader, train_fn, data_mean, data_std, configs, save=False)" ] }, { diff --git a/leakpro/run.py b/leakpro/run.py index 80d9551b..797ad7da 100644 --- a/leakpro/run.py +++ b/leakpro/run.py @@ -11,11 +11,12 @@ def run_inverting(model: Module, client_data: DataLoader, train_fn: Callable, - data_mean:Tensor, data_std: Tensor, config: dict, experiment_name: str = "InvertingGradients") -> None: + data_mean:Tensor, data_std: Tensor, config: dict, experiment_name: str = "InvertingGradients", save:bool = False) -> None: """Runs InvertingGradients.""" attack = InvertingGradients(model, client_data, train_fn, data_mean, data_std, config) result = attack.run_attack() - result.save(name=experiment_name, path="./leakpro_output/results") + if save: + result.save(name=experiment_name, path="./leakpro_output/results", config=config) return result def run_inverting_audit(model: Module, dataset: Dataset, From 0f3cbf759b174fa749e6b407a85cbf2daf13035c Mon Sep 17 00:00:00 2001 From: henrikfo Date: Thu, 5 Dec 2024 10:26:36 +0000 Subject: [PATCH 12/14] fixed gia report_handler --- leakpro/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/leakpro/run.py b/leakpro/run.py index 797ad7da..a060f78b 100644 --- a/leakpro/run.py +++ b/leakpro/run.py @@ -11,7 +11,7 @@ def run_inverting(model: Module, client_data: DataLoader, train_fn: Callable, - data_mean:Tensor, data_std: Tensor, config: dict, experiment_name: str = "InvertingGradients", save:bool = False) -> None: + data_mean:Tensor, data_std: Tensor, config: dict, experiment_name: str = "InvertingGradients", save:bool = True) -> None: """Runs InvertingGradients.""" attack = InvertingGradients(model, client_data, train_fn, data_mean, data_std, config) result = attack.run_attack() From 59bcec592394a542a767c7cb367964f29523f49b Mon Sep 17 00:00:00 2001 From: henrikfo Date: Tue, 10 Dec 2024 15:41:06 +0000 Subject: [PATCH 13/14] updated notebook with local directory for reporthandler example --- examples/report_handler/report_handler.ipynb | 137 +++++++++---------- leakpro/run.py | 5 +- 2 files changed, 69 insertions(+), 73 deletions(-) diff --git a/examples/report_handler/report_handler.ipynb b/examples/report_handler/report_handler.ipynb index a5f785a9..725e673e 100644 --- a/examples/report_handler/report_handler.ipynb +++ b/examples/report_handler/report_handler.ipynb @@ -61,7 +61,7 @@ "output_type": "stream", "text": [ "[Parallel(n_jobs=64)]: Using backend ThreadingBackend with 64 concurrent workers.\n", - "[Parallel(n_jobs=64)]: Done 2 out of 64 | elapsed: 1.2s remaining: 36.4s\n", + "[Parallel(n_jobs=64)]: Done 2 out of 64 | elapsed: 1.2s remaining: 38.1s\n", "[Parallel(n_jobs=64)]: Done 64 out of 64 | elapsed: 4.4s finished\n" ] }, @@ -148,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "35aee5a3", "metadata": {}, "outputs": [ @@ -163,41 +163,31 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-12-05 10:20:59,883 INFO Inverting gradient initialized.\n", - "2024-12-05 10:21:01,974 INFO Iteration 0, loss 0.00037895439891144633\n", - "2024-12-05 10:21:01,981 INFO New best loss: 0.00037895439891144633 on round: 0\n", - "2024-12-05 10:21:02,261 INFO New best loss: 0.00036255631130188704 on round: 2\n", - "2024-12-05 10:21:02,527 INFO New best loss: 0.0003450227959547192 on round: 4\n", - "2024-12-05 10:21:02,669 INFO New best loss: 0.00032889979775063694 on round: 5\n", - "2024-12-05 10:21:02,807 INFO New best loss: 0.00031025282805785537 on round: 6\n", - "2024-12-05 10:21:03,075 INFO New best loss: 0.00029653109959326684 on round: 8\n", - "2024-12-05 10:21:03,212 INFO New best loss: 0.00029343905043788254 on round: 9\n", - "2024-12-05 10:21:03,357 INFO New best loss: 0.0002867175207938999 on round: 10\n", - "2024-12-05 10:21:03,482 INFO New best loss: 0.0002853123296517879 on round: 11\n", - "2024-12-05 10:21:03,607 INFO New best loss: 0.00027869627228938043 on round: 12\n", - "2024-12-05 10:21:03,735 INFO New best loss: 0.0002784693206194788 on round: 13\n", - "2024-12-05 10:21:04,092 INFO New best loss: 0.00027562331524677575 on round: 14\n", - "2024-12-05 10:21:05,240 INFO New best loss: 0.0002711182169150561 on round: 23\n", - "2024-12-05 10:21:05,367 INFO New best loss: 0.00026883225655183196 on round: 24\n", - "2024-12-05 10:21:05,622 INFO New best loss: 0.0002568941272329539 on round: 26\n", - "2024-12-05 10:21:05,748 INFO New best loss: 0.0002561017172411084 on round: 27\n", - "2024-12-05 10:21:05,876 INFO New best loss: 0.0002496801607776433 on round: 28\n", - "2024-12-05 10:21:06,517 INFO New best loss: 0.0002473248168826103 on round: 33\n", - "2024-12-05 10:21:06,648 INFO New best loss: 0.0002469118044245988 on round: 34\n", - "2024-12-05 10:21:07,403 INFO New best loss: 0.00024639995535835624 on round: 40\n", - "2024-12-05 10:21:07,532 INFO New best loss: 0.000243801434407942 on round: 41\n", - "2024-12-05 10:21:07,670 INFO New best loss: 0.0002417033538222313 on round: 42\n", - "2024-12-05 10:21:07,803 INFO New best loss: 0.0002388415450695902 on round: 43\n", - "2024-12-05 10:21:07,924 INFO New best loss: 0.00023807109391782433 on round: 44\n", - "2024-12-05 10:21:08,178 INFO New best loss: 0.00023767446691635996 on round: 46\n", - "2024-12-05 10:21:08,302 INFO New best loss: 0.0002361331571592018 on round: 47\n", - "2024-12-05 10:21:08,427 INFO New best loss: 0.00023581282584927976 on round: 48\n", - "2024-12-05 10:21:09,189 INFO New best loss: 0.0002355617325520143 on round: 54\n", - "2024-12-05 10:21:09,321 INFO New best loss: 0.00023450757726095617 on round: 55\n", - "2024-12-05 10:21:09,447 INFO New best loss: 0.0002343545202165842 on round: 56\n", - "2024-12-05 10:21:09,702 INFO New best loss: 0.0002338749181944877 on round: 58\n", - "2024-12-05 10:21:10,466 INFO New best loss: 0.00023371708812192082 on round: 64\n", - "2024-12-05 10:21:10,595 INFO New best loss: 0.00023358875478152186 on round: 65\n", + "2024-12-10 15:37:19,407 INFO Inverting gradient initialized.\n", + "2024-12-10 15:37:21,549 INFO Iteration 0, loss 0.00017312286945525557\n", + "2024-12-10 15:37:21,558 INFO New best loss: 0.00017312286945525557 on round: 0\n", + "2024-12-10 15:37:21,716 INFO New best loss: 0.00015130748215597123 on round: 1\n", + "2024-12-10 15:37:21,865 INFO New best loss: 0.00014282172196544707 on round: 2\n", + "2024-12-10 15:37:22,004 INFO New best loss: 0.0001371056423522532 on round: 3\n", + "2024-12-10 15:37:22,146 INFO New best loss: 0.00013112256419844925 on round: 4\n", + "2024-12-10 15:37:22,404 INFO New best loss: 0.00012842075375374407 on round: 6\n", + "2024-12-10 15:37:22,531 INFO New best loss: 0.00012392438657116145 on round: 7\n", + "2024-12-10 15:37:22,661 INFO New best loss: 0.00011872354662045836 on round: 8\n", + "2024-12-10 15:37:22,915 INFO New best loss: 0.00011686021025525406 on round: 10\n", + "2024-12-10 15:37:23,045 INFO New best loss: 0.00011540275590959936 on round: 11\n", + "2024-12-10 15:37:23,176 INFO New best loss: 0.00010958370694424957 on round: 12\n", + "2024-12-10 15:37:23,306 INFO New best loss: 0.00010606442810967565 on round: 13\n", + "2024-12-10 15:37:23,432 INFO New best loss: 0.00010551762534305453 on round: 14\n", + "2024-12-10 15:37:23,558 INFO New best loss: 0.00010321227455278859 on round: 15\n", + "2024-12-10 15:37:23,688 INFO New best loss: 0.00010049617412732914 on round: 16\n", + "2024-12-10 15:37:23,819 INFO New best loss: 9.98983159661293e-05 on round: 17\n", + "2024-12-10 15:37:24,073 INFO New best loss: 9.980545291909948e-05 on round: 19\n", + "2024-12-10 15:37:24,330 INFO New best loss: 9.797140228329226e-05 on round: 21\n", + "2024-12-10 15:37:24,457 INFO New best loss: 9.713656618259847e-05 on round: 22\n", + "2024-12-10 15:37:29,775 INFO New best loss: 9.707298158900812e-05 on round: 63\n", + "2024-12-10 15:37:31,176 INFO New best loss: 9.695763583295047e-05 on round: 75\n", + "2024-12-10 15:37:31,299 INFO New best loss: 9.695166954770684e-05 on round: 76\n", + "2024-12-10 15:37:31,425 INFO New best loss: 9.689731814432889e-05 on round: 77\n", "/opt/conda/lib/python3.10/site-packages/torchmetrics/utilities/prints.py:70: FutureWarning: Importing `peak_signal_noise_ratio` from `torchmetrics.functional` was deprecated and will be removed in 2.0. Import `peak_signal_noise_ratio` from `torchmetrics.image` instead.\n", " _future_warning(\n" ] @@ -222,7 +212,8 @@ "configs = InvertingConfig()\n", "configs.at_iterations = 80 # Decreased from 8000 to avoid GPU memory crash\n", "\n", - "GIA_result = run_inverting(model, client_dataloader, train_fn, data_mean, data_std, configs, save=False)" + "name = \"my_gia_results\"\n", + "GIA_result = run_inverting(model, client_dataloader, train_fn, data_mean, data_std, configs, experiment_name=name, save=True)" ] }, { @@ -292,7 +283,7 @@ " warnings.warn(\n", "/opt/conda/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.\n", " warnings.warn(msg)\n", - "Training Progress: 100%|██████████| 3/3 [00:13<00:00, 4.39s/it]\n" + "Training Progress: 100%|██████████| 3/3 [00:13<00:00, 4.60s/it]\n" ] } ], @@ -332,8 +323,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-11-27 08:38:27,995 INFO Target model blueprint created from ResNet18 in ./mia_utils/utils/cifar_model_preparation.py.\n", - "2024-11-27 08:38:27,997 INFO Loaded target model metadata from ./target/model_metadata.pkl\n" + "2024-12-10 15:37:48,180 INFO Target model blueprint created from ResNet18 in ./mia_utils/utils/cifar_model_preparation.py.\n", + "2024-12-10 15:37:48,183 INFO Loaded target model metadata from ./target/model_metadata.pkl\n" ] }, { @@ -347,26 +338,26 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-11-27 08:38:28,210 INFO Loaded target model from ./target\n", - "2024-11-27 08:38:29,306 INFO Loaded population dataset from ./data/cifar10.pkl\n", - "2024-11-27 08:38:29,306 INFO Loaded population dataset from ./data/cifar10.pkl\n", - "2024-11-27 08:38:29,307 INFO Creating shadow model handler singleton\n", - "2024-11-27 08:38:29,308 INFO Creating distillation model handler singleton\n", - "2024-11-27 08:38:29,310 INFO Configuring the Population attack\n", - "2024-11-27 08:38:29,310 INFO Added attack: population\n", - "2024-11-27 08:38:29,311 INFO Preparing attack: population\n", - "2024-11-27 08:38:29,312 INFO Preparing attack data for training the Population attack\n", - "2024-11-27 08:38:29,316 INFO Subsampling attack data from 24000 points\n", - "2024-11-27 08:38:29,317 INFO Number of attack data points after subsampling: 24000\n", - "2024-11-27 08:38:29,318 INFO Computing signals for the Population attack\n", - "Getting loss for model 1/ 1: 100%|██████████| 750/750 [00:12<00:00, 62.27it/s]\n", - "2024-11-27 08:38:41,410 INFO Running attack: population\n", - "2024-11-27 08:38:41,415 INFO Running the Population attack on the target model\n", - "Getting loss for model 1/ 1: 100%|██████████| 1125/1125 [00:18<00:00, 62.04it/s]\n", - "2024-11-27 08:38:59,662 INFO Attack completed\n", - "2024-11-27 08:38:59,674 INFO Finished attack: population\n", - "2024-11-27 08:38:59,675 INFO Preparing results for attack: population\n", - "2024-11-27 08:38:59,675 INFO Auditing completed\n" + "2024-12-10 15:37:48,394 INFO Loaded target model from ./target\n", + "2024-12-10 15:37:49,289 INFO Loaded population dataset from ./data/cifar10.pkl\n", + "2024-12-10 15:37:49,290 INFO Loaded population dataset from ./data/cifar10.pkl\n", + "2024-12-10 15:37:49,291 INFO Creating shadow model handler singleton\n", + "2024-12-10 15:37:49,294 INFO Creating distillation model handler singleton\n", + "2024-12-10 15:37:49,296 INFO Configuring the Population attack\n", + "2024-12-10 15:37:49,297 INFO Added attack: population\n", + "2024-12-10 15:37:49,298 INFO Preparing attack: population\n", + "2024-12-10 15:37:49,299 INFO Preparing attack data for training the Population attack\n", + "2024-12-10 15:37:49,306 INFO Subsampling attack data from 24000 points\n", + "2024-12-10 15:37:49,307 INFO Number of attack data points after subsampling: 24000\n", + "2024-12-10 15:37:49,308 INFO Computing signals for the Population attack\n", + "Getting loss for model 1/ 1: 100%|██████████| 750/750 [00:12<00:00, 60.59it/s]\n", + "2024-12-10 15:38:01,752 INFO Running attack: population\n", + "2024-12-10 15:38:01,758 INFO Running the Population attack on the target model\n", + "Getting loss for model 1/ 1: 100%|██████████| 1125/1125 [00:17<00:00, 63.04it/s]\n", + "2024-12-10 15:38:19,692 INFO Attack completed\n", + "2024-12-10 15:38:19,703 INFO Finished attack: population\n", + "2024-12-10 15:38:19,703 INFO Preparing results for attack: population\n", + "2024-12-10 15:38:19,704 INFO Auditing completed\n" ] } ], @@ -395,14 +386,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-11-27 08:38:59,693 INFO Initializing report handler...\n", - "2024-11-27 08:38:59,693 INFO report_dir set to: ../../leakpro_output/results\n", - "2024-11-27 08:38:59,694 INFO Saving results for singling_out\n", - "2024-11-27 08:39:01,779 INFO Saving results for linkability_risk\n", - "2024-11-27 08:39:03,748 INFO Saving results for inference_risk_base\n", - "2024-11-27 08:39:08,623 INFO Saving results for inference_risk_worst\n", - "2024-11-27 08:39:10,925 INFO Saving results for gia\n", - "2024-11-27 08:39:10,939 INFO Saving results for population\n" + "2024-12-10 15:38:19,719 INFO Initializing report handler...\n", + "2024-12-10 15:38:19,719 INFO report_dir set to: ./leakpro_output/results\n", + "2024-12-10 15:38:19,720 INFO Saving results for singling_out\n", + "2024-12-10 15:38:21,860 INFO Saving results for linkability_risk\n", + "2024-12-10 15:38:23,763 INFO Saving results for inference_risk_base\n", + "2024-12-10 15:38:28,190 INFO Saving results for inference_risk_worst\n", + "2024-12-10 15:38:30,418 INFO Saving results for gia\n", + "2024-12-10 15:38:30,431 INFO Saving results for population\n" ] }, { @@ -448,7 +439,11 @@ "\n", "# Import and initialize ReportHandler\n", "from leakpro.reporting.report_handler import ReportHandler\n", - "report_handler = ReportHandler()\n", + "\n", + "# Set report_dir to \"./leakpro_output/results\" to the results to a local results folder\n", + "# or don't use the report_dir argument to let the ReportHandler find an already\n", + "# existing results folder\n", + "report_handler = ReportHandler(report_dir=\"./leakpro_output/results\")\n", "\n", "# # Save Synthetic results using the ReportHandler\n", "report_handler.save_results(attack_name=\"singling_out\", result_data=sin_out_res)\n", @@ -474,7 +469,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-11-27 08:40:04,818 INFO PDF compiled\n" + "2024-12-10 15:38:59,171 INFO PDF compiled\n" ] }, { diff --git a/leakpro/run.py b/leakpro/run.py index a060f78b..8abb771d 100644 --- a/leakpro/run.py +++ b/leakpro/run.py @@ -11,12 +11,13 @@ def run_inverting(model: Module, client_data: DataLoader, train_fn: Callable, - data_mean:Tensor, data_std: Tensor, config: dict, experiment_name: str = "InvertingGradients", save:bool = True) -> None: + data_mean:Tensor, data_std: Tensor, config: dict, experiment_name: str = "InvertingGradients", + path:str = "./leakpro_output/results", save:bool = True) -> None: """Runs InvertingGradients.""" attack = InvertingGradients(model, client_data, train_fn, data_mean, data_std, config) result = attack.run_attack() if save: - result.save(name=experiment_name, path="./leakpro_output/results", config=config) + result.save(name=experiment_name, path=path, config=config) return result def run_inverting_audit(model: Module, dataset: Dataset, From 0468bc85440e49d304f928f5315b9f04d57b0419 Mon Sep 17 00:00:00 2001 From: henrikfo Date: Wed, 11 Dec 2024 13:48:09 +0000 Subject: [PATCH 14/14] Removed duplicate gia files --- examples/report_handler/gia_utils/cifar.py | 26 - examples/report_handler/gia_utils/model.py | 81 --- examples/report_handler/mia_utils/audit.yaml | 65 ++- examples/report_handler/report_handler.ipynb | 496 ++++++++++++------- leakpro/metrics/attack_result.py | 2 +- 5 files changed, 359 insertions(+), 311 deletions(-) delete mode 100644 examples/report_handler/gia_utils/cifar.py delete mode 100644 examples/report_handler/gia_utils/model.py diff --git a/examples/report_handler/gia_utils/cifar.py b/examples/report_handler/gia_utils/cifar.py deleted file mode 100644 index 98ee9490..00000000 --- a/examples/report_handler/gia_utils/cifar.py +++ /dev/null @@ -1,26 +0,0 @@ -"""Module with functions for preparing the dataset for training the target models.""" -import torchvision -from torch import as_tensor, randperm -from torch.utils.data import DataLoader, Subset, TensorDataset -from torchvision import transforms - -from leakpro.fl_utils.data_utils import get_meanstd - - -def get_cifar10_loader(num_images:int =1, batch_size:int = 1, num_workers:int = 2 ) -> TensorDataset: - """Get the full dataset for CIFAR10.""" - trainset = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transforms.ToTensor()) - data_mean, data_std = get_meanstd(trainset) - transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize(data_mean, data_std)]) - trainset.transform = transform - - total_examples = len(trainset) - random_indices = randperm(total_examples)[:num_images] - subset_trainset = Subset(trainset, random_indices) - trainloader = DataLoader(subset_trainset, batch_size=batch_size, - shuffle=False, drop_last=True, num_workers=num_workers) - data_mean = as_tensor(data_mean)[:, None, None] - data_std = as_tensor(data_std)[:, None, None] - return trainloader, data_mean, data_std diff --git a/examples/report_handler/gia_utils/model.py b/examples/report_handler/gia_utils/model.py deleted file mode 100644 index 403ff164..00000000 --- a/examples/report_handler/gia_utils/model.py +++ /dev/null @@ -1,81 +0,0 @@ -"""ResNet model.""" -from typing import Optional - -import torch -import torchvision -from torch import nn -from torchvision.models.resnet import BasicBlock, Bottleneck - -from leakpro.utils.import_helper import Self - - -class ResNet(torchvision.models.ResNet): - """ResNet generalization for CIFAR thingies.""" - - def __init__(self: Self, block: BasicBlock, layers: list, num_classes: int=10, zero_init_residual: bool=False, # noqa: C901 - groups: int=1, base_width: int=64, replace_stride_with_dilation: list=None, - norm_layer: Optional[nn.Module]=None, strides: list=[1, 2, 2, 2], pool: str="avg") -> None: # noqa: B006 - """Initialize as usual. Layers and strides are scriptable.""" - super(torchvision.models.ResNet, self).__init__() # nn.Module - if norm_layer is None: - norm_layer = nn.BatchNorm2d - self._norm_layer = norm_layer - - - self.dilation = 1 - if replace_stride_with_dilation is None: - # each element in the tuple indicates if we should replace - # the 2x2 stride with a dilated convolution instead - replace_stride_with_dilation = [False, False, False, False] - if len(replace_stride_with_dilation) != 4: - raise ValueError("replace_stride_with_dilation should be None " - "or a 4-element tuple, got {}".format(replace_stride_with_dilation)) - self.groups = groups - - self.inplanes = base_width - self.base_width = 64 # Do this to circumvent BasicBlock errors. The value is not actually used. - self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False) - self.bn1 = norm_layer(self.inplanes) - self.relu = nn.ReLU(inplace=True) - - self.layers = torch.nn.ModuleList() - width = self.inplanes - for idx, layer in enumerate(layers): - self.layers.append(self._make_layer(block, width, layer, stride=strides[idx], dilate=replace_stride_with_dilation[idx])) - width *= 2 - - self.pool = nn.AdaptiveAvgPool2d((1, 1)) if pool == "avg" else nn.AdaptiveMaxPool2d((1, 1)) - self.fc = nn.Linear(width // 2 * block.expansion, num_classes) - - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") - elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): - nn.init.constant_(m.weight, 1) - nn.init.constant_(m.bias, 0) - - # Zero-initialize the last BN in each residual branch, - # so that the residual branch starts with zeros, and each residual block behaves like an identity. - # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 - if zero_init_residual: - for m in self.modules(): - if isinstance(m, Bottleneck): - nn.init.constant_(m.bn3.weight, 0) - elif isinstance(m, torchvision.models.resnet.BasicBlock): - nn.init.constant_(m.bn2.weight, 0) - - - def _forward_impl(self: Self, x: torch.Tensor) -> None: - # See note [TorchScript super()] - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - - for layer in self.layers: - x = layer(x) - - x = self.pool(x) - x = torch.flatten(x, 1) - x = self.fc(x) - - return x diff --git a/examples/report_handler/mia_utils/audit.yaml b/examples/report_handler/mia_utils/audit.yaml index 073fe7f1..0fcd5558 100644 --- a/examples/report_handler/mia_utils/audit.yaml +++ b/examples/report_handler/mia_utils/audit.yaml @@ -1,43 +1,38 @@ audit: # Configurations for auditing random_seed: 1234 # Integer specifying the random seed attack_list: - # rmia: - # training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) - # attack_data_fraction: 0.5 # Fraction of auxiliary dataset to sample from during attack - # num_shadow_models: 3 # Number of shadow models to train - # online: True # perform online or offline attack - # temperature: 2 - # gamma: 2.0 - # offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b. - # offline_b: 0.66 - # qmia: - # training_data_fraction: 1.0 # Fraction of the auxilary dataset (data without train and test indices) to use for training the quantile regressor - # epochs: 5 # Number of training epochs for quantile regression + rmia: + training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) + attack_data_fraction: 0.5 # Fraction of auxiliary dataset to sample from during attack + num_shadow_models: 3 # Number of shadow models to train + online: True # perform online or offline attack + temperature: 2 + gamma: 2.0 + offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b. + offline_b: 0.66 population: attack_data_fraction: 1.0 # Fraction of the auxilary dataset to use for this attack - # lira: - # training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) - # num_shadow_models: 3 # Number of shadow models to train - # online: False # perform online or offline attack - # fixed_variance: True # Use a fixed variance for the whole audit - # boosting: True - # loss_traj: - # training_distill_data_fraction : 0.7 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2 - # number_of_traj: 10 # Number of epochs (number of points in the loss trajectory) - # label_only: False # True or False - # mia_classifier_epochs: 100 - # HSJ: - # attack_data_fraction: 0.01 # Fraction of the auxilary dataset to use for this attack - # target_metadata_path: "./target/model_metadata.pkl" - # num_iterations: 2 # Number of iterations for the optimization - # initial_num_evals: 100 # Number of evaluations for number of random vecotr to estimate the gradient - # max_num_evals: 10000 # Maximum number of evaluations - # stepsize_search: "geometric_progression" # Step size search method - # gamma: 1.0 # Gamma for the optimization - # constraint: 2 - # batch_size: 50 - # verbose: True - # epsilon_threshold: 1e-6 + lira: + training_data_fraction: 0.5 # Fraction of the auxilary dataset to use for this attack (in each shadow model training) + num_shadow_models: 3 # Number of shadow models to train + online: True # perform online or offline attack + loss_traj: + training_distill_data_fraction : 0.7 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2 + number_of_traj: 10 # Number of epochs (number of points in the loss trajectory) + label_only: False # True or False + mia_classifier_epochs: 100 + HSJ: + attack_data_fraction: 0.01 # Fraction of the auxilary dataset to use for this attack + target_metadata_path: "./target/model_metadata.pkl" + num_iterations: 2 # Number of iterations for the optimization + initial_num_evals: 100 # Number of evaluations for number of random vecotr to estimate the gradient + max_num_evals: 10000 # Maximum number of evaluations + stepsize_search: "geometric_progression" # Step size search method + gamma: 1.0 # Gamma for the optimization + constraint: 2 + batch_size: 50 + verbose: True + epsilon_threshold: 1e-6 output_dir: "./leakpro_output" attack_type: "mia" #mia, gia diff --git a/examples/report_handler/report_handler.ipynb b/examples/report_handler/report_handler.ipynb index 725e673e..94cae90a 100644 --- a/examples/report_handler/report_handler.ipynb +++ b/examples/report_handler/report_handler.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "bcf529c7-8bfe-49da-9889-59111ec2cd73", "metadata": {}, "outputs": [], @@ -52,28 +52,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "c89f3738", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Parallel(n_jobs=64)]: Using backend ThreadingBackend with 64 concurrent workers.\n", - "[Parallel(n_jobs=64)]: Done 2 out of 64 | elapsed: 1.2s remaining: 38.1s\n", - "[Parallel(n_jobs=64)]: Done 64 out of 64 | elapsed: 4.4s finished\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Unique predictions (array([-1, 1]), array([ 3, 97]))\n", - "Syn anom shape (3, 14)\n" - ] - } - ], + "outputs": [], "source": [ "syn_anom = return_anomalies(df=syn, n_estimators=1000, n_jobs=-1, verbose=True)\n", "print(\"Syn anom shape\",syn_anom.shape)" @@ -81,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "ad69ece9", "metadata": {}, "outputs": [], @@ -97,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "7d7ffb5a", "metadata": {}, "outputs": [], @@ -114,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "0a5c20e2", "metadata": {}, "outputs": [], @@ -148,54 +130,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "35aee5a3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Files already downloaded and verified\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2024-12-10 15:37:19,407 INFO Inverting gradient initialized.\n", - "2024-12-10 15:37:21,549 INFO Iteration 0, loss 0.00017312286945525557\n", - "2024-12-10 15:37:21,558 INFO New best loss: 0.00017312286945525557 on round: 0\n", - "2024-12-10 15:37:21,716 INFO New best loss: 0.00015130748215597123 on round: 1\n", - "2024-12-10 15:37:21,865 INFO New best loss: 0.00014282172196544707 on round: 2\n", - "2024-12-10 15:37:22,004 INFO New best loss: 0.0001371056423522532 on round: 3\n", - "2024-12-10 15:37:22,146 INFO New best loss: 0.00013112256419844925 on round: 4\n", - "2024-12-10 15:37:22,404 INFO New best loss: 0.00012842075375374407 on round: 6\n", - "2024-12-10 15:37:22,531 INFO New best loss: 0.00012392438657116145 on round: 7\n", - "2024-12-10 15:37:22,661 INFO New best loss: 0.00011872354662045836 on round: 8\n", - "2024-12-10 15:37:22,915 INFO New best loss: 0.00011686021025525406 on round: 10\n", - "2024-12-10 15:37:23,045 INFO New best loss: 0.00011540275590959936 on round: 11\n", - "2024-12-10 15:37:23,176 INFO New best loss: 0.00010958370694424957 on round: 12\n", - "2024-12-10 15:37:23,306 INFO New best loss: 0.00010606442810967565 on round: 13\n", - "2024-12-10 15:37:23,432 INFO New best loss: 0.00010551762534305453 on round: 14\n", - "2024-12-10 15:37:23,558 INFO New best loss: 0.00010321227455278859 on round: 15\n", - "2024-12-10 15:37:23,688 INFO New best loss: 0.00010049617412732914 on round: 16\n", - "2024-12-10 15:37:23,819 INFO New best loss: 9.98983159661293e-05 on round: 17\n", - "2024-12-10 15:37:24,073 INFO New best loss: 9.980545291909948e-05 on round: 19\n", - "2024-12-10 15:37:24,330 INFO New best loss: 9.797140228329226e-05 on round: 21\n", - "2024-12-10 15:37:24,457 INFO New best loss: 9.713656618259847e-05 on round: 22\n", - "2024-12-10 15:37:29,775 INFO New best loss: 9.707298158900812e-05 on round: 63\n", - "2024-12-10 15:37:31,176 INFO New best loss: 9.695763583295047e-05 on round: 75\n", - "2024-12-10 15:37:31,299 INFO New best loss: 9.695166954770684e-05 on round: 76\n", - "2024-12-10 15:37:31,425 INFO New best loss: 9.689731814432889e-05 on round: 77\n", - "/opt/conda/lib/python3.10/site-packages/torchmetrics/utilities/prints.py:70: FutureWarning: Importing `peak_signal_noise_ratio` from `torchmetrics.functional` was deprecated and will be removed in 2.0. Import `peak_signal_noise_ratio` from `torchmetrics.image` instead.\n", - " _future_warning(\n" - ] - } - ], + "outputs": [], "source": [ - "from gia_utils.cifar import get_cifar10_loader\n", - "from gia_utils.model import ResNet\n", + "sys.path.append(\"../gia/cifar10_inverting_1_image/\")\n", + "from cifar import get_cifar10_loader\n", + "from model import ResNet\n", "from torchvision.models.resnet import BasicBlock\n", "\n", "from leakpro.attacks.gia_attacks.invertinggradients import InvertingConfig\n", @@ -226,7 +168,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "d38d6aa1", "metadata": {}, "outputs": [], @@ -241,19 +183,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "a45a0d6b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Files already downloaded and verified\n", - "Files already downloaded and verified\n" - ] - } - ], + "outputs": [], "source": [ "from mia_utils.utils.cifar_data_preparation import get_cifar_dataloader\n", "from mia_utils.utils.cifar_model_preparation import ResNet18, create_trained_model_and_metadata\n", @@ -271,22 +204,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "4cda80cf", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", - " warnings.warn(\n", - "/opt/conda/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.\n", - " warnings.warn(msg)\n", - "Training Progress: 100%|██████████| 3/3 [00:13<00:00, 4.60s/it]\n" - ] - } - ], + "outputs": [], "source": [ "# Train the model\n", "if not os.path.exists(\"target\"):\n", @@ -315,7 +236,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "f28eb14f", "metadata": {}, "outputs": [ @@ -323,41 +244,311 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-12-10 15:37:48,180 INFO Target model blueprint created from ResNet18 in ./mia_utils/utils/cifar_model_preparation.py.\n", - "2024-12-10 15:37:48,183 INFO Loaded target model metadata from ./target/model_metadata.pkl\n" + "2024-12-11 13:30:05,252 INFO Target model blueprint created from ResNet18 in ./mia_utils/utils/cifar_model_preparation.py.\n", + "2024-12-11 13:30:05,254 INFO Loaded target model metadata from ./target/model_metadata.pkl\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "{'audit': {'random_seed': 1234, 'attack_list': {'population': {'attack_data_fraction': 1.0}}, 'output_dir': './leakpro_output', 'attack_type': 'mia', 'modality': 'image'}, 'target': {'module_path': './mia_utils/utils/cifar_model_preparation.py', 'model_class': 'ResNet18', 'target_folder': './target', 'data_path': './data/cifar10.pkl'}, 'shadow_model': None, 'distillation_model': None}\n" + "{'audit': {'random_seed': 1234, 'attack_list': {'rmia': {'training_data_fraction': 0.5, 'attack_data_fraction': 0.5, 'num_shadow_models': 3, 'online': True, 'temperature': 2, 'gamma': 2.0, 'offline_a': 0.33, 'offline_b': 0.66}, 'population': {'attack_data_fraction': 1.0}, 'lira': {'training_data_fraction': 0.5, 'num_shadow_models': 3, 'online': True}, 'loss_traj': {'training_distill_data_fraction': 0.7, 'number_of_traj': 10, 'label_only': False, 'mia_classifier_epochs': 100}, 'HSJ': {'attack_data_fraction': 0.01, 'target_metadata_path': './target/model_metadata.pkl', 'num_iterations': 2, 'initial_num_evals': 100, 'max_num_evals': 10000, 'stepsize_search': 'geometric_progression', 'gamma': 1.0, 'constraint': 2, 'batch_size': 50, 'verbose': True, 'epsilon_threshold': '1e-6'}}, 'output_dir': './leakpro_output', 'attack_type': 'mia', 'modality': 'image'}, 'target': {'module_path': './mia_utils/utils/cifar_model_preparation.py', 'model_class': 'ResNet18', 'target_folder': './target', 'data_path': './data/cifar10.pkl'}, 'shadow_model': None, 'distillation_model': None}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2024-12-10 15:37:48,394 INFO Loaded target model from ./target\n", - "2024-12-10 15:37:49,289 INFO Loaded population dataset from ./data/cifar10.pkl\n", - "2024-12-10 15:37:49,290 INFO Loaded population dataset from ./data/cifar10.pkl\n", - "2024-12-10 15:37:49,291 INFO Creating shadow model handler singleton\n", - "2024-12-10 15:37:49,294 INFO Creating distillation model handler singleton\n", - "2024-12-10 15:37:49,296 INFO Configuring the Population attack\n", - "2024-12-10 15:37:49,297 INFO Added attack: population\n", - "2024-12-10 15:37:49,298 INFO Preparing attack: population\n", - "2024-12-10 15:37:49,299 INFO Preparing attack data for training the Population attack\n", - "2024-12-10 15:37:49,306 INFO Subsampling attack data from 24000 points\n", - "2024-12-10 15:37:49,307 INFO Number of attack data points after subsampling: 24000\n", - "2024-12-10 15:37:49,308 INFO Computing signals for the Population attack\n", - "Getting loss for model 1/ 1: 100%|██████████| 750/750 [00:12<00:00, 60.59it/s]\n", - "2024-12-10 15:38:01,752 INFO Running attack: population\n", - "2024-12-10 15:38:01,758 INFO Running the Population attack on the target model\n", - "Getting loss for model 1/ 1: 100%|██████████| 1125/1125 [00:17<00:00, 63.04it/s]\n", - "2024-12-10 15:38:19,692 INFO Attack completed\n", - "2024-12-10 15:38:19,703 INFO Finished attack: population\n", - "2024-12-10 15:38:19,703 INFO Preparing results for attack: population\n", - "2024-12-10 15:38:19,704 INFO Auditing completed\n" + "/opt/conda/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", + " warnings.warn(\n", + "/opt/conda/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.\n", + " warnings.warn(msg)\n", + "2024-12-11 13:30:05,518 INFO Loaded target model from ./target\n", + "2024-12-11 13:30:06,417 INFO Loaded population dataset from ./data/cifar10.pkl\n", + "2024-12-11 13:30:06,419 INFO Loaded population dataset from ./data/cifar10.pkl\n", + "2024-12-11 13:30:06,420 INFO Creating shadow model handler singleton\n", + "2024-12-11 13:30:06,423 INFO Creating distillation model handler singleton\n", + "2024-12-11 13:30:06,425 INFO Configuring RMIA attack\n", + "2024-12-11 13:30:06,426 INFO Added attack: rmia\n", + "2024-12-11 13:30:06,427 INFO Configuring the Population attack\n", + "2024-12-11 13:30:06,428 INFO Added attack: population\n", + "2024-12-11 13:30:06,429 INFO Added attack: lira\n", + "2024-12-11 13:30:06,430 INFO Configuring Loss trajecatory attack\n", + "2024-12-11 13:30:06,434 INFO Added attack: loss_traj\n", + "2024-12-11 13:30:06,436 INFO Configuring label only attack\n", + "2024-12-11 13:30:06,438 INFO Added attack: HSJ\n", + "2024-12-11 13:30:06,439 INFO Preparing attack: rmia\n", + "2024-12-11 13:30:06,440 INFO Preparing shadow models for RMIA attack\n", + "2024-12-11 13:30:06,441 INFO Preparing attack data for training the RMIA attack\n", + "2024-12-11 13:30:06,447 INFO Check for 3 shadow models (dataset: 60000 points)\n", + "2024-12-11 13:30:07,605 INFO Number of existing models exceeds or equals the number of models to create\n", + "2024-12-11 13:30:07,607 INFO Loading shadow model 2\n", + "2024-12-11 13:30:07,832 INFO Loaded model from ./leakpro_output/attack_objects/shadow_model/shadow_model_2.pkl\n", + "2024-12-11 13:30:07,833 INFO Loading shadow model 1\n", + "2024-12-11 13:30:08,059 INFO Loaded model from ./leakpro_output/attack_objects/shadow_model/shadow_model_1.pkl\n", + "2024-12-11 13:30:08,060 INFO Loading shadow model 0\n", + "2024-12-11 13:30:08,274 INFO Loaded model from ./leakpro_output/attack_objects/shadow_model/shadow_model_0.pkl\n", + "2024-12-11 13:30:08,279 INFO Running attack: rmia\n", + "2024-12-11 13:30:08,280 INFO Running RMIA online attack\n", + "2024-12-11 13:30:08,281 INFO Loading metadata 2\n", + "2024-12-11 13:30:08,285 INFO Loading metadata 1\n", + "2024-12-11 13:30:08,287 INFO Loading metadata 0\n", + "2024-12-11 13:30:08,881 INFO Number of points in the audit dataset that are used for online attack: 26907\n", + "2024-12-11 13:31:51,245 INFO Subsampling attack data from 24000 points \n", + "2024-12-11 13:31:51,249 INFO Number of attack data points after subsampling: 12000\n", + "2024-12-11 13:32:41,414 INFO Finished attack: rmia \n", + "2024-12-11 13:32:41,416 INFO Preparing attack: population\n", + "2024-12-11 13:32:41,417 INFO Preparing attack data for training the Population attack\n", + "2024-12-11 13:32:41,423 INFO Subsampling attack data from 24000 points\n", + "2024-12-11 13:32:41,425 INFO Number of attack data points after subsampling: 24000\n", + "2024-12-11 13:32:41,426 INFO Computing signals for the Population attack\n", + "Getting loss for model 1/ 1: 100%|██████████| 750/750 [00:13<00:00, 57.54it/s]\n", + "2024-12-11 13:32:54,560 INFO Running attack: population\n", + "2024-12-11 13:32:54,565 INFO Running the Population attack on the target model\n", + "Getting loss for model 1/ 1: 100%|██████████| 1125/1125 [00:17<00:00, 64.57it/s]\n", + "2024-12-11 13:33:12,093 INFO Attack completed\n", + "2024-12-11 13:33:12,105 INFO Finished attack: population\n", + "2024-12-11 13:33:12,106 INFO Preparing attack: lira\n", + "2024-12-11 13:33:12,131 INFO Number of existing models exceeds or equals the number of models to create\n", + "2024-12-11 13:33:12,132 INFO Loading shadow model 2\n", + "/opt/conda/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", + " warnings.warn(\n", + "/opt/conda/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.\n", + " warnings.warn(msg)\n", + "2024-12-11 13:33:12,340 INFO Loaded model from ./leakpro_output/attack_objects/shadow_model/shadow_model_2.pkl\n", + "2024-12-11 13:33:12,341 INFO Loading shadow model 1\n", + "2024-12-11 13:33:12,543 INFO Loaded model from ./leakpro_output/attack_objects/shadow_model/shadow_model_1.pkl\n", + "2024-12-11 13:33:12,544 INFO Loading shadow model 0\n", + "2024-12-11 13:33:12,745 INFO Loaded model from ./leakpro_output/attack_objects/shadow_model/shadow_model_0.pkl\n", + "2024-12-11 13:33:12,748 INFO Create masks for all IN and OUT samples\n", + "2024-12-11 13:33:12,749 INFO Loading metadata 2\n", + "2024-12-11 13:33:12,750 INFO Loading metadata 1\n", + "2024-12-11 13:33:12,752 INFO Loading metadata 0\n", + "2024-12-11 13:33:12,761 INFO Calculating the logits for all 3 shadow models\n", + "2024-12-11 13:34:25,365 INFO Calculating the logits for the target model \n", + "2024-12-11 13:34:51,520 INFO Running attack: lira \n", + "Processing audit samples: 100%|██████████| 26907/26907 [00:06<00:00, 4483.96it/s]\n", + "2024-12-11 13:34:57,628 INFO Finished attack: lira\n", + "2024-12-11 13:34:57,630 INFO Preparing attack: loss_traj\n", + "2024-12-11 13:34:57,631 INFO Preparing the data for loss trajectory attack\n", + "2024-12-11 13:34:57,640 INFO Training shadow models on 3600 points\n", + "2024-12-11 13:34:57,647 INFO Number of existing models exceeds or equals the number of models to create\n", + "2024-12-11 13:34:57,648 INFO Loading shadow model 6\n", + "/opt/conda/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", + " warnings.warn(\n", + "/opt/conda/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.\n", + " warnings.warn(msg)\n", + "2024-12-11 13:34:57,858 INFO Loaded model from ./leakpro_output/attack_objects/shadow_model/shadow_model_6.pkl\n", + "2024-12-11 13:34:58,208 INFO Training distillation of the shadow model on 21147 points\n", + "2024-12-11 13:34:58,276 INFO Created distillation dataset with size 21147\n", + "Epoch 1/10: 100%|██████████| 166/166 [00:07<00:00, 21.51it/s]\n", + "2024-12-11 13:35:05,996 INFO Epoch 1/10 | Loss: 63.67065370082855\n", + "2024-12-11 13:35:06,119 INFO Saved distillation model for epoch 0 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:35:06,121 INFO Storing metadata for distillation model\n", + "2024-12-11 13:35:06,126 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 2/10: 100%|██████████| 166/166 [00:07<00:00, 21.17it/s]\n", + "2024-12-11 13:35:13,972 INFO Epoch 2/10 | Loss: 9.804248925298452\n", + "2024-12-11 13:35:14,085 INFO Saved distillation model for epoch 1 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:35:14,087 INFO Storing metadata for distillation model\n", + "2024-12-11 13:35:14,091 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 3/10: 100%|██████████| 166/166 [00:07<00:00, 20.81it/s]\n", + "2024-12-11 13:35:22,074 INFO Epoch 3/10 | Loss: 5.905309362336993\n", + "2024-12-11 13:35:22,189 INFO Saved distillation model for epoch 2 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:35:22,191 INFO Storing metadata for distillation model\n", + "2024-12-11 13:35:22,194 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 4/10: 100%|██████████| 166/166 [00:08<00:00, 19.90it/s]\n", + "2024-12-11 13:35:30,539 INFO Epoch 4/10 | Loss: 4.59359712805599\n", + "2024-12-11 13:35:30,672 INFO Saved distillation model for epoch 3 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:35:30,674 INFO Storing metadata for distillation model\n", + "2024-12-11 13:35:30,678 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 5/10: 100%|██████████| 166/166 [00:08<00:00, 19.77it/s]\n", + "2024-12-11 13:35:39,080 INFO Epoch 5/10 | Loss: 4.146416590549052\n", + "2024-12-11 13:35:39,197 INFO Saved distillation model for epoch 4 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:35:39,198 INFO Storing metadata for distillation model\n", + "2024-12-11 13:35:39,202 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 6/10: 100%|██████████| 166/166 [00:08<00:00, 19.94it/s]\n", + "2024-12-11 13:35:47,532 INFO Epoch 6/10 | Loss: 3.597899131476879\n", + "2024-12-11 13:35:47,654 INFO Saved distillation model for epoch 5 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:35:47,656 INFO Storing metadata for distillation model\n", + "2024-12-11 13:35:47,660 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 7/10: 100%|██████████| 166/166 [00:07<00:00, 20.79it/s]\n", + "2024-12-11 13:35:55,650 INFO Epoch 7/10 | Loss: 3.298955911770463\n", + "2024-12-11 13:35:55,765 INFO Saved distillation model for epoch 6 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:35:55,767 INFO Storing metadata for distillation model\n", + "2024-12-11 13:35:55,771 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 8/10: 100%|██████████| 166/166 [00:07<00:00, 21.17it/s]\n", + "2024-12-11 13:36:03,616 INFO Epoch 8/10 | Loss: 3.090168266557157\n", + "2024-12-11 13:36:03,730 INFO Saved distillation model for epoch 7 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:36:03,731 INFO Storing metadata for distillation model\n", + "2024-12-11 13:36:03,736 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 9/10: 100%|██████████| 166/166 [00:07<00:00, 21.07it/s]\n", + "2024-12-11 13:36:11,618 INFO Epoch 9/10 | Loss: 2.8429356180131435\n", + "2024-12-11 13:36:11,731 INFO Saved distillation model for epoch 8 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:36:11,732 INFO Storing metadata for distillation model\n", + "2024-12-11 13:36:11,736 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 10/10: 100%|██████████| 166/166 [00:07<00:00, 21.02it/s]\n", + "2024-12-11 13:36:19,639 INFO Epoch 10/10 | Loss: 2.593701013363898\n", + "2024-12-11 13:36:19,759 INFO Saved distillation model for epoch 9 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:36:19,760 INFO Storing metadata for distillation model\n", + "2024-12-11 13:36:19,764 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:36:19,914 INFO Created distillation dataset with size 21147\n", + "Epoch 1/10: 100%|██████████| 166/166 [00:07<00:00, 21.29it/s]\n", + "2024-12-11 13:36:27,717 INFO Epoch 1/10 | Loss: 114.02811643481255\n", + "2024-12-11 13:36:27,829 INFO Saved distillation model for epoch 0 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:36:27,831 INFO Storing metadata for distillation model\n", + "2024-12-11 13:36:27,837 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 2/10: 100%|██████████| 166/166 [00:07<00:00, 20.76it/s]\n", + "2024-12-11 13:36:35,838 INFO Epoch 2/10 | Loss: 44.576862797141075\n", + "2024-12-11 13:36:35,950 INFO Saved distillation model for epoch 1 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:36:35,951 INFO Storing metadata for distillation model\n", + "2024-12-11 13:36:35,956 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 3/10: 100%|██████████| 166/166 [00:07<00:00, 20.76it/s]\n", + "2024-12-11 13:36:43,958 INFO Epoch 3/10 | Loss: 28.83185875415802\n", + "2024-12-11 13:36:44,077 INFO Saved distillation model for epoch 2 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:36:44,079 INFO Storing metadata for distillation model\n", + "2024-12-11 13:36:44,083 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 4/10: 100%|██████████| 166/166 [00:08<00:00, 20.59it/s]\n", + "2024-12-11 13:36:52,150 INFO Epoch 4/10 | Loss: 21.28936092555523\n", + "2024-12-11 13:36:52,270 INFO Saved distillation model for epoch 3 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:36:52,272 INFO Storing metadata for distillation model\n", + "2024-12-11 13:36:52,277 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 5/10: 100%|██████████| 166/166 [00:08<00:00, 20.58it/s]\n", + "2024-12-11 13:37:00,346 INFO Epoch 5/10 | Loss: 16.496566824615\n", + "2024-12-11 13:37:00,457 INFO Saved distillation model for epoch 4 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:37:00,459 INFO Storing metadata for distillation model\n", + "2024-12-11 13:37:00,463 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 6/10: 100%|██████████| 166/166 [00:08<00:00, 20.54it/s]\n", + "2024-12-11 13:37:08,548 INFO Epoch 6/10 | Loss: 13.477496419101954\n", + "2024-12-11 13:37:08,660 INFO Saved distillation model for epoch 5 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:37:08,662 INFO Storing metadata for distillation model\n", + "2024-12-11 13:37:08,666 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 7/10: 100%|██████████| 166/166 [00:08<00:00, 20.69it/s]\n", + "2024-12-11 13:37:16,693 INFO Epoch 7/10 | Loss: 11.912189535796642\n", + "2024-12-11 13:37:16,812 INFO Saved distillation model for epoch 6 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:37:16,814 INFO Storing metadata for distillation model\n", + "2024-12-11 13:37:16,818 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 8/10: 100%|██████████| 166/166 [00:08<00:00, 20.62it/s]\n", + "2024-12-11 13:37:24,872 INFO Epoch 8/10 | Loss: 11.11345386132598\n", + "2024-12-11 13:37:24,991 INFO Saved distillation model for epoch 7 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:37:24,992 INFO Storing metadata for distillation model\n", + "2024-12-11 13:37:24,997 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 9/10: 100%|██████████| 166/166 [00:08<00:00, 20.44it/s]\n", + "2024-12-11 13:37:33,121 INFO Epoch 9/10 | Loss: 10.197872441262007\n", + "2024-12-11 13:37:33,241 INFO Saved distillation model for epoch 8 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:37:33,242 INFO Storing metadata for distillation model\n", + "2024-12-11 13:37:33,247 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "Epoch 10/10: 100%|██████████| 166/166 [00:08<00:00, 20.26it/s]\n", + "2024-12-11 13:37:41,445 INFO Epoch 10/10 | Loss: 9.438245516270399\n", + "2024-12-11 13:37:41,565 INFO Saved distillation model for epoch 9 to ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:37:41,566 INFO Storing metadata for distillation model\n", + "2024-12-11 13:37:41,570 INFO Metadata for distillation model stored in ./leakpro_output/attack_objects/distillation_model\n", + "2024-12-11 13:37:41,595 INFO Loading MIA trajectory_train_data.pkl: 7200 points\n", + "2024-12-11 13:37:41,602 INFO Loading MIA trajectory_test_data.pkl: 36000 points\n", + "2024-12-11 13:37:41,610 INFO Running attack: loss_traj\n", + "2024-12-11 13:37:41,614 INFO Loading Loss Trajectory classifier\n", + "2024-12-11 13:37:41,615 INFO Running the MIA attack\n", + "100%|██████████| 563/563 [00:00<00:00, 823.90it/s]\n", + "2024-12-11 13:37:42,932 INFO Finished attack: loss_traj\n", + "2024-12-11 13:37:42,933 INFO Preparing attack: HSJ\n", + "2024-12-11 13:37:42,934 INFO Preparing the data for Hop Skip Jump attack\n", + "2024-12-11 13:37:42,938 INFO Running attack: HSJ\n", + "2024-12-11 13:37:42,939 INFO Running Hop Skip Jump distance attack\n", + "Epoch: 100%|██████████| 8/8 [00:00<00:00, 131.66it/s]\n", + "2024-12-11 13:37:44,287 INFO All data points in the batch have been successfully perturbed by random noise after 20 evaluations.\n", + "Batch: 0%| | 0/8 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -434,9 +597,6 @@ } ], "source": [ - "import sys\n", - "sys.path.append(\"../..\")\n", - "\n", "# Import and initialize ReportHandler\n", "from leakpro.reporting.report_handler import ReportHandler\n", "\n", @@ -445,13 +605,13 @@ "# existing results folder\n", "report_handler = ReportHandler(report_dir=\"./leakpro_output/results\")\n", "\n", - "# # Save Synthetic results using the ReportHandler\n", + "# Save Synthetic results using the ReportHandler\n", "report_handler.save_results(attack_name=\"singling_out\", result_data=sin_out_res)\n", "report_handler.save_results(attack_name=\"linkability_risk\", result_data=link_res)\n", "report_handler.save_results(attack_name=\"inference_risk_base\", result_data=inf_res)\n", "report_handler.save_results(attack_name=\"inference_risk_worst\", result_data=inf_res_worst)\n", "\n", - "# # Save GIA results using report handler\n", + "# Save GIA results using report handler\n", "report_handler.save_results(attack_name=\"gia\", result_data=GIA_result)\n", "\n", "# Save MIA resuls using report handler\n", @@ -461,7 +621,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 3, "id": "1d91c7e0", "metadata": {}, "outputs": [ @@ -469,13 +629,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-12-10 15:38:59,171 INFO PDF compiled\n" + "2024-12-11 13:45:59,879 INFO PDF compiled\n" ] }, { "data": { "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -484,7 +644,7 @@ { "data": { "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -493,7 +653,7 @@ { "data": { "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -502,7 +662,7 @@ { "data": { "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -511,7 +671,7 @@ { "data": { "text/plain": [ - "
" + "
" ] }, "metadata": {}, diff --git a/leakpro/metrics/attack_result.py b/leakpro/metrics/attack_result.py index cf5a7cd6..27816388 100755 --- a/leakpro/metrics/attack_result.py +++ b/leakpro/metrics/attack_result.py @@ -276,7 +276,7 @@ def save(self:Self, path: str, name: str, config:dict = None, show_plot:bool = F # Get the name for the attack configuration config_name = get_config_name(result_config) - self.id = f"{name}{config_name}" + self.id = f"{name}{config_name}".replace("/", "__") save_path = f"{path}/{name}/{self.id}" # Data to be saved