Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

User code handler #63

Merged
merged 16 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 24 additions & 25 deletions config/audit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,26 @@ audit: # Configurations for auditing
random_seed: 1234 # Integer specifying the random seed
attack_list:
rmia:
training_data_fraction: 0.4 # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
attack_data_fraction: 0.1 # Fraction of auxiliary dataset to sample from during attack
num_shadow_models: 8 # Number of shadow models to train
training_data_fraction: 0.1 # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
attack_data_fraction: 0.025 # Fraction of auxiliary dataset to sample from during attack
num_shadow_models: 2 # Number of shadow models to train
online: False # perform online or offline attack
temperature: 2
gamma: 2.0
offline_a: 0.33 # parameter from which we compute p(x) from p_OUT(x) such that p_IN(x) = a p_OUT(x) + b.
offline_b: 0.66
qmia:
training_data_fraction: 0.5 # Fraction of the auxilary dataset (data without train and test indices) to use for training the quantile regressor
epochs: 5 # Number of training epochs for quantile regression
population:
attack_data_fraction: 0.1 # Fraction of the auxilary dataset to use for this attack
loss_traj:
training_distill_data_fraction : 0.2 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2
number_of_traj: 20 # Number of epochs (number of points in the loss trajectory)
attack_mode: "soft_label" # label_only, soft_label
attack_data_dir: "./leakpro_output/attack_objects/loss_traj"
mia_classifier_epochs: 100
lira:
training_data_fraction: 0.4 # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
training_data_fraction: 0.1 # Fraction of the auxilary dataset to use for this attack (in each shadow model training)
num_shadow_models: 8 # Number of shadow models to train
online: false # perform online or offline attack
online: False # perform online or offline attack
fixed_variance: True # Use a fixed variance for the whole audit

loss_traj:
training_distill_data_fraction : 0.2 # Fraction of the auxilary dataset to use for training the distillation models D_s = (1-D_KD)/2
number_of_traj: 1 # Number of epochs (number of points in the loss trajectory)
label_only: "False" # True or False
attack_data_dir: "./leakpro_output/attack_objects/loss_traj"
mia_classifier_epochs: 10

report_log: "./leakpro_output/results" # Folder to save the auditing report
config_log: "./leakpro_output/config" # Folder to save the configuration files
Expand All @@ -36,21 +31,21 @@ audit: # Configurations for auditing
split_method: "no_overlapping" # Method of creating the attack dataset

target:
# Target model path
module_path: "./leakpro/shadow_model_blueprints.py"
model_class: "ResNet18"
# Data paths
trained_model_path: "./target/target_model.pkl"
trained_model_metadata_path: "./target/model_metadata.pkl"
data_path: "./target/data/cinic10.pkl"


shadow_model:
storage_path: "./leakpro_output/attack_objects/shadow_models"
# Path to a Python file with the shadow model architecture
module_path: "./leakpro/shadow_model_blueprints.py"
#
# [Optional] Define a shadow model (if none, shadow model will follow the target model)
# Path to a Python file with the shadow model architecture
#module_path: "./leakpro/shadow_model_blueprints.py"
# Name of the class to instantiate from the specified file
model_class_path: "ResNet18" #"ConvNet"
model_class: "ResNet18" #"ConvNet"
optimizer:
name: sgd #adam, sgd, rmsprop
lr: 0.01
Expand All @@ -61,10 +56,10 @@ shadow_model:
# Initialization parameters
init_params: {}

distillation_target_model:
storage_path: "./leakpro_output/attack_objects/distillation_target_models"
module_path: "./leakpro/shadow_model_blueprints.py"
# model_class: "ConvNet"
distillation_model:
storage_path: "./leakpro_output/attack_objects/distillation_models"
#module_path: "./leakpro/shadow_model_blueprints.py"
#model_class: "ConvNet"
optimizer:
name: sgd #adam, sgd, rmsprop
lr: 0.01
Expand All @@ -74,6 +69,9 @@ distillation_target_model:
name: crossentropyloss # crossentropyloss, nllloss, mseloss
# Initialization parameters
init_params: {}
<<<<<<< HEAD

=======
trained_model_path: "./leakpro_output/attack_objects/distillation_target_models/distillation_model.pkl"
trained_model_metadata_path: "./leakpro_output/attack_objects/distillation_target_models/model_metadata.pkl"
data_path: "./leakpro_output/attack_objects/distillation_target_models/cinic10.pkl"
Expand All @@ -94,5 +92,6 @@ distillation_shadow_model:
trained_model_path: "./leakpro_output/attack_objects/distillation_shadow_models/distillation_model.pkl"
trained_model_metadata_path: "./leakpro_output/attack_objects/distillation_shadow_models/model_metadata.pkl"
data_path: "./leakpro_output/attack_objects/distillation_shadow_models/cinic10.pkl"
>>>>>>> main


49 changes: 7 additions & 42 deletions leakpro.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
import time
from pathlib import Path

import joblib
import numpy as np
import yaml
from torch import load, manual_seed
from torch import manual_seed
from torch.utils.data import Subset

import leakpro.dev_utils.train as utils
Expand All @@ -23,7 +22,7 @@
prepare_train_test_datasets,
)
from leakpro.reporting.utils import prepare_priavcy_risk_report
from leakpro.utils.input_handler import get_class_from_module, import_module_from_file
from leakpro.user_inputs.cifar10_input_handler import Cifar10InputHandler


def setup_log(name: str, save_file: bool=True) -> logging.Logger:
Expand Down Expand Up @@ -96,16 +95,13 @@ def generate_user_input(configs: dict, logger: logging.Logger)->None:

if __name__ == "__main__":


#args = "./config/adult.yaml" # noqa: ERA001
# user_args = "./config/dev_config/cifar10.yaml" # noqa: ERA001
user_args = "./config/dev_config/cinic10.yaml" # noqa: ERA001

with open(user_args, "rb") as f:
user_configs = yaml.safe_load(f)

# Setup logger
logger = setup_log("analysis")
logger = setup_log("LeakPro", save_file=True)

# Generate user input
generate_user_input(user_configs, logger) # This is for developing purposes only
Expand All @@ -126,41 +122,10 @@ def generate_user_input(configs: dict, logger: logging.Logger)->None:
report_dir = f"{configs['audit']['report_log']}"
Path(report_dir).mkdir(parents=True, exist_ok=True)

# Get the target metadata
target_model_metadata_path = f'{configs["target"]["trained_model_metadata_path"]}'
try:
with open(target_model_metadata_path, "rb") as f:
target_model_metadata = joblib.load(f)
except FileNotFoundError:
logger.error(f"Could not find the target model metadata at {target_model_metadata_path}")

# Create a class instance of target model
target_module = import_module_from_file(configs["target"]["module_path"])
target_model_blueprint = get_class_from_module(target_module, configs["target"]["model_class"])
logger.info(f"Target model blueprint created from {configs['target']['model_class']} in {configs['target']['module_path']}")

# Load the target model parameters into the blueprint
with open(configs["target"]["trained_model_path"], "rb") as f:
target_model = target_model_blueprint(**target_model_metadata["model_metadata"]["init_params"])
target_model.load_state_dict(load(f))
logger.info(f"Loaded target model from {configs['target']['trained_model_path']}")

# Get the population dataset
try:
with open(configs["target"]["data_path"], "rb") as file:
population = joblib.load(file)
logger.info(f"Loaded population dataset from {configs['target']['data_path']}")
except FileNotFoundError:
logger.error(f"Could not find the population dataset at {configs['target']['data_path']}")
# ------------------------------------------------
# Now we have the target model, its metadata, and the train/test dataset indices.
attack_scheduler = AttackScheduler(
population,
target_model,
target_model_metadata["model_metadata"],
configs,
logger,
)
# Create user input handler
handler = Cifar10InputHandler(configs=configs, logger=logger)

attack_scheduler = AttackScheduler(handler)
audit_results = attack_scheduler.run_attacks()

for attack_name in audit_results:
Expand Down
32 changes: 10 additions & 22 deletions leakpro/attacks/attack_scheduler.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
"""Module that contains the AttackScheduler class, which is responsible for creating and executing attacks."""
import logging

from torch import nn

from leakpro.attacks.mia_attacks.abstract_mia import AbstractMIA
from leakpro.attacks.mia_attacks.attack_factory_mia import AttackFactoryMIA
from leakpro.dataset import GeneralDataset
from leakpro.import_helper import Any, Dict, Self
from leakpro.user_inputs.abstract_input_handler import AbstractInputHandler


class AttackScheduler:
Expand All @@ -16,47 +13,38 @@ class AttackScheduler:

def __init__(
self:Self,
population:GeneralDataset,
target_model:nn.Module,
target_model_metadata:Dict[str, Any],
configs:Dict[str, Any],
logger:logging.Logger
handler: AbstractInputHandler,
) -> None:
"""Initialize the AttackScheduler class.

Args:
----
population (GeneralDataset): The population dataset.
target_model (torch.nn.Module): The target model.
target_model_metadata (Dict[str, Any]): The metadata of the target model.
configs (Dict[str, Any]): The configurations.
logger (logging.Logger): The logger object.
handler (AbstractInputHandler): The handler object that contains the user inputs.

"""
configs = handler.configs
if configs["audit"]["attack_type"] not in list(self.attack_type_to_factory.keys()):
raise ValueError(
f"Unknown attack type: {configs['audit']['attack_type']}. "
f"Supported attack types: {self.attack_type_to_factory.keys()}"
)

# Prepare factory with shared items
# Prepare factory
factory = self.attack_type_to_factory[configs["audit"]["attack_type"]]
factory.set_population_and_audit_data(population,target_model_metadata)
factory.set_target_model_and_loss(target_model, nn.CrossEntropyLoss()) #TODO: Enable arbitrary loss functions
factory.set_logger(logger)

self.logger = logger
self.logger = handler.logger

# Create the attacks
self.attack_list = list(configs["audit"]["attack_list"].keys())
self.attacks = []
for attack_name in self.attack_list:
try:
attack = factory.create_attack(attack_name, configs)
attack = factory.create_attack(attack_name, handler)
self.add_attack(attack)
self.logger.info(f"Added attack: {attack_name}")
except ValueError as e:
logger.info(e)
self.logger.info(e)
self.logger.info(f"Failed to create attack: {attack_name}, supported attacks: {factory.attack_classes.keys()}")

def add_attack(self:Self, attack: AbstractMIA) -> None:
"""Add an attack to the list of attacks."""
Expand All @@ -77,7 +65,7 @@ def run_attacks(self:Self) -> Dict[str, Any]:
self.logger.info(f"Finished attack: {attack_type}")
return results

def identify_attacks(self:Self) -> None:
def map_setting_to_attacks(self:Self) -> None:
"""Identify relevant attacks based on adversary setting."""
# TODO: Implement this mapping and remove attack list from configs
pass
Loading
Loading